1 /* Math module -- standard C math library functions, pi and e */
2
3 /* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9 These are the "spirit of 754" rules:
10
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign). This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions. In that specific case, the
25 sign of the zero determines the result of 1/0.
26
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33
34 For #1, raise OverflowError.
35
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38
39 For #3 and #4, raise ValueError. It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42
43 */
44
45 /*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
55 #ifndef Py_BUILD_CORE_BUILTIN
56 # define Py_BUILD_CORE_MODULE 1
57 #endif
58
59 #include "Python.h"
60 #include "pycore_abstract.h" // _PyNumber_Index()
61 #include "pycore_bitutils.h" // _Py_bit_length()
62 #include "pycore_call.h" // _PyObject_CallNoArgs()
63 #include "pycore_long.h" // _PyLong_GetZero()
64 #include "pycore_moduleobject.h" // _PyModule_GetState()
65 #include "pycore_object.h" // _PyObject_LookupSpecial()
66 #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
67 /* For DBL_EPSILON in _math.h */
68 #include <float.h>
69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
70 #include "_math.h"
71 #include <stdbool.h>
72
73 #include "clinic/mathmodule.c.h"
74
75 /*[clinic input]
76 module math
77 [clinic start generated code]*/
78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79
80
81 typedef struct {
82 PyObject *str___ceil__;
83 PyObject *str___floor__;
84 PyObject *str___trunc__;
85 } math_module_state;
86
87 static inline math_module_state*
get_math_module_state(PyObject * module)88 get_math_module_state(PyObject *module)
89 {
90 void *state = _PyModule_GetState(module);
91 assert(state != NULL);
92 return (math_module_state *)state;
93 }
94
95 /*
96 Double and triple length extended precision algorithms from:
97
98 Accurate Sum and Dot Product
99 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
100 https://doi.org/10.1137/030601818
101 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
102
103 */
104
105 typedef struct{ double hi; double lo; } DoubleLength;
106
107 static DoubleLength
dl_fast_sum(double a,double b)108 dl_fast_sum(double a, double b)
109 {
110 /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
111 assert(fabs(a) >= fabs(b));
112 double x = a + b;
113 double y = (a - x) + b;
114 return (DoubleLength) {x, y};
115 }
116
117 static DoubleLength
dl_sum(double a,double b)118 dl_sum(double a, double b)
119 {
120 /* Algorithm 3.1 Error-free transformation of the sum */
121 double x = a + b;
122 double z = x - a;
123 double y = (a - (x - z)) + (b - z);
124 return (DoubleLength) {x, y};
125 }
126
127 #ifndef UNRELIABLE_FMA
128
129 static DoubleLength
dl_mul(double x,double y)130 dl_mul(double x, double y)
131 {
132 /* Algorithm 3.5. Error-free transformation of a product */
133 double z = x * y;
134 double zz = fma(x, y, -z);
135 return (DoubleLength) {z, zz};
136 }
137
138 #else
139
140 /*
141 The default implementation of dl_mul() depends on the C math library
142 having an accurate fma() function as required by § 7.12.13.1 of the
143 C99 standard.
144
145 The UNRELIABLE_FMA option is provided as a slower but accurate
146 alternative for builds where the fma() function is found wanting.
147 The speed penalty may be modest (17% slower on an Apple M1 Max),
148 so don't hesitate to enable this build option.
149
150 The algorithms are from the T. J. Dekker paper:
151 A Floating-Point Technique for Extending the Available Precision
152 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
153 */
154
155 static DoubleLength
dl_split(double x)156 dl_split(double x) {
157 // Dekker (5.5) and (5.6).
158 double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
159 double hi = t - (t - x);
160 double lo = x - hi;
161 return (DoubleLength) {hi, lo};
162 }
163
164 static DoubleLength
dl_mul(double x,double y)165 dl_mul(double x, double y)
166 {
167 // Dekker (5.12) and mul12()
168 DoubleLength xx = dl_split(x);
169 DoubleLength yy = dl_split(y);
170 double p = xx.hi * yy.hi;
171 double q = xx.hi * yy.lo + xx.lo * yy.hi;
172 double z = p + q;
173 double zz = p - z + q + xx.lo * yy.lo;
174 return (DoubleLength) {z, zz};
175 }
176
177 #endif
178
179 typedef struct { double hi; double lo; double tiny; } TripleLength;
180
181 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
182
183 static TripleLength
tl_fma(double x,double y,TripleLength total)184 tl_fma(double x, double y, TripleLength total)
185 {
186 /* Algorithm 5.10 with SumKVert for K=3 */
187 DoubleLength pr = dl_mul(x, y);
188 DoubleLength sm = dl_sum(total.hi, pr.hi);
189 DoubleLength r1 = dl_sum(total.lo, pr.lo);
190 DoubleLength r2 = dl_sum(r1.hi, sm.lo);
191 return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
192 }
193
194 static double
tl_to_d(TripleLength total)195 tl_to_d(TripleLength total)
196 {
197 DoubleLength last = dl_sum(total.lo, total.hi);
198 return total.tiny + last.lo + last.hi;
199 }
200
201
202 /*
203 sin(pi*x), giving accurate results for all finite x (especially x
204 integral or close to an integer). This is here for use in the
205 reflection formula for the gamma function. It conforms to IEEE
206 754-2008 for finite arguments, but not for infinities or nans.
207 */
208
209 static const double pi = 3.141592653589793238462643383279502884197;
210 static const double logpi = 1.144729885849400174143427351353058711647;
211
212 /* Version of PyFloat_AsDouble() with in-line fast paths
213 for exact floats and integers. Gives a substantial
214 speed improvement for extracting float arguments.
215 */
216
217 #define ASSIGN_DOUBLE(target_var, obj, error_label) \
218 if (PyFloat_CheckExact(obj)) { \
219 target_var = PyFloat_AS_DOUBLE(obj); \
220 } \
221 else if (PyLong_CheckExact(obj)) { \
222 target_var = PyLong_AsDouble(obj); \
223 if (target_var == -1.0 && PyErr_Occurred()) { \
224 goto error_label; \
225 } \
226 } \
227 else { \
228 target_var = PyFloat_AsDouble(obj); \
229 if (target_var == -1.0 && PyErr_Occurred()) { \
230 goto error_label; \
231 } \
232 }
233
234 static double
m_sinpi(double x)235 m_sinpi(double x)
236 {
237 double y, r;
238 int n;
239 /* this function should only ever be called for finite arguments */
240 assert(Py_IS_FINITE(x));
241 y = fmod(fabs(x), 2.0);
242 n = (int)round(2.0*y);
243 assert(0 <= n && n <= 4);
244 switch (n) {
245 case 0:
246 r = sin(pi*y);
247 break;
248 case 1:
249 r = cos(pi*(y-0.5));
250 break;
251 case 2:
252 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
253 -0.0 instead of 0.0 when y == 1.0. */
254 r = sin(pi*(1.0-y));
255 break;
256 case 3:
257 r = -cos(pi*(y-1.5));
258 break;
259 case 4:
260 r = sin(pi*(y-2.0));
261 break;
262 default:
263 Py_UNREACHABLE();
264 }
265 return copysign(1.0, x)*r;
266 }
267
268 /* Implementation of the real gamma function. Kept here to work around
269 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
270 on various platforms (Windows, MacOS). In extensive but non-exhaustive
271 random tests, this function proved accurate to within <= 10 ulps across the
272 entire float domain. Note that accuracy may depend on the quality of the
273 system math functions, the pow function in particular. Special cases
274 follow C99 annex F. The parameters and method are tailored to platforms
275 whose double format is the IEEE 754 binary64 format.
276
277 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
278 and g=6.024680040776729583740234375; these parameters are amongst those
279 used by the Boost library. Following Boost (again), we re-express the
280 Lanczos sum as a rational function, and compute it that way. The
281 coefficients below were computed independently using MPFR, and have been
282 double-checked against the coefficients in the Boost source code.
283
284 For x < 0.0 we use the reflection formula.
285
286 There's one minor tweak that deserves explanation: Lanczos' formula for
287 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
288 values, x+g-0.5 can be represented exactly. However, in cases where it
289 can't be represented exactly the small error in x+g-0.5 can be magnified
290 significantly by the pow and exp calls, especially for large x. A cheap
291 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
292 involved in the computation of x+g-0.5 (that is, e = computed value of
293 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
294
295 Correction factor
296 -----------------
297 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
298 double, and e is tiny. Then:
299
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
301 = pow(y, x-0.5)/exp(y) * C,
302
303 where the correction_factor C is given by
304
305 C = pow(1-e/y, x-0.5) * exp(e)
306
307 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
308
309 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
310
311 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
312
313 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
314
315 Note that for accuracy, when computing r*C it's better to do
316
317 r + e*g/y*r;
318
319 than
320
321 r * (1 + e*g/y);
322
323 since the addition in the latter throws away most of the bits of
324 information in e*g/y.
325 */
326
327 #define LANCZOS_N 13
328 static const double lanczos_g = 6.024680040776729583740234375;
329 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
330 static const double lanczos_num_coeffs[LANCZOS_N] = {
331 23531376880.410759688572007674451636754734846804940,
332 42919803642.649098768957899047001988850926355848959,
333 35711959237.355668049440185451547166705960488635843,
334 17921034426.037209699919755754458931112671403265390,
335 6039542586.3520280050642916443072979210699388420708,
336 1439720407.3117216736632230727949123939715485786772,
337 248874557.86205415651146038641322942321632125127801,
338 31426415.585400194380614231628318205362874684987640,
339 2876370.6289353724412254090516208496135991145378768,
340 186056.26539522349504029498971604569928220784236328,
341 8071.6720023658162106380029022722506138218516325024,
342 210.82427775157934587250973392071336271166969580291,
343 2.5066282746310002701649081771338373386264310793408
344 };
345
346 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
347 static const double lanczos_den_coeffs[LANCZOS_N] = {
348 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
349 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
350
351 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
352 #define NGAMMA_INTEGRAL 23
353 static const double gamma_integral[NGAMMA_INTEGRAL] = {
354 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
355 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
356 1307674368000.0, 20922789888000.0, 355687428096000.0,
357 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
358 51090942171709440000.0, 1124000727777607680000.0,
359 };
360
361 /* Lanczos' sum L_g(x), for positive x */
362
363 static double
lanczos_sum(double x)364 lanczos_sum(double x)
365 {
366 double num = 0.0, den = 0.0;
367 int i;
368 assert(x > 0.0);
369 /* evaluate the rational function lanczos_sum(x). For large
370 x, the obvious algorithm risks overflow, so we instead
371 rescale the denominator and numerator of the rational
372 function by x**(1-LANCZOS_N) and treat this as a
373 rational function in 1/x. This also reduces the error for
374 larger x values. The choice of cutoff point (5.0 below) is
375 somewhat arbitrary; in tests, smaller cutoff values than
376 this resulted in lower accuracy. */
377 if (x < 5.0) {
378 for (i = LANCZOS_N; --i >= 0; ) {
379 num = num * x + lanczos_num_coeffs[i];
380 den = den * x + lanczos_den_coeffs[i];
381 }
382 }
383 else {
384 for (i = 0; i < LANCZOS_N; i++) {
385 num = num / x + lanczos_num_coeffs[i];
386 den = den / x + lanczos_den_coeffs[i];
387 }
388 }
389 return num/den;
390 }
391
392
393 static double
m_tgamma(double x)394 m_tgamma(double x)
395 {
396 double absx, r, y, z, sqrtpow;
397
398 /* special cases */
399 if (!Py_IS_FINITE(x)) {
400 if (Py_IS_NAN(x) || x > 0.0)
401 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
402 else {
403 errno = EDOM;
404 return Py_NAN; /* tgamma(-inf) = nan, invalid */
405 }
406 }
407 if (x == 0.0) {
408 errno = EDOM;
409 /* tgamma(+-0.0) = +-inf, divide-by-zero */
410 return copysign(Py_INFINITY, x);
411 }
412
413 /* integer arguments */
414 if (x == floor(x)) {
415 if (x < 0.0) {
416 errno = EDOM; /* tgamma(n) = nan, invalid for */
417 return Py_NAN; /* negative integers n */
418 }
419 if (x <= NGAMMA_INTEGRAL)
420 return gamma_integral[(int)x - 1];
421 }
422 absx = fabs(x);
423
424 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
425 if (absx < 1e-20) {
426 r = 1.0/x;
427 if (Py_IS_INFINITY(r))
428 errno = ERANGE;
429 return r;
430 }
431
432 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
433 x > 200, and underflows to +-0.0 for x < -200, not a negative
434 integer. */
435 if (absx > 200.0) {
436 if (x < 0.0) {
437 return 0.0/m_sinpi(x);
438 }
439 else {
440 errno = ERANGE;
441 return Py_HUGE_VAL;
442 }
443 }
444
445 y = absx + lanczos_g_minus_half;
446 /* compute error in sum */
447 if (absx > lanczos_g_minus_half) {
448 /* note: the correction can be foiled by an optimizing
449 compiler that (incorrectly) thinks that an expression like
450 a + b - a - b can be optimized to 0.0. This shouldn't
451 happen in a standards-conforming compiler. */
452 double q = y - absx;
453 z = q - lanczos_g_minus_half;
454 }
455 else {
456 double q = y - lanczos_g_minus_half;
457 z = q - absx;
458 }
459 z = z * lanczos_g / y;
460 if (x < 0.0) {
461 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
462 r -= z * r;
463 if (absx < 140.0) {
464 r /= pow(y, absx - 0.5);
465 }
466 else {
467 sqrtpow = pow(y, absx / 2.0 - 0.25);
468 r /= sqrtpow;
469 r /= sqrtpow;
470 }
471 }
472 else {
473 r = lanczos_sum(absx) / exp(y);
474 r += z * r;
475 if (absx < 140.0) {
476 r *= pow(y, absx - 0.5);
477 }
478 else {
479 sqrtpow = pow(y, absx / 2.0 - 0.25);
480 r *= sqrtpow;
481 r *= sqrtpow;
482 }
483 }
484 if (Py_IS_INFINITY(r))
485 errno = ERANGE;
486 return r;
487 }
488
489 /*
490 lgamma: natural log of the absolute value of the Gamma function.
491 For large arguments, Lanczos' formula works extremely well here.
492 */
493
494 static double
m_lgamma(double x)495 m_lgamma(double x)
496 {
497 double r;
498 double absx;
499
500 /* special cases */
501 if (!Py_IS_FINITE(x)) {
502 if (Py_IS_NAN(x))
503 return x; /* lgamma(nan) = nan */
504 else
505 return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
506 }
507
508 /* integer arguments */
509 if (x == floor(x) && x <= 2.0) {
510 if (x <= 0.0) {
511 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
512 return Py_HUGE_VAL; /* integers n <= 0 */
513 }
514 else {
515 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
516 }
517 }
518
519 absx = fabs(x);
520 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
521 if (absx < 1e-20)
522 return -log(absx);
523
524 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
525 having a second set of numerator coefficients for lanczos_sum that
526 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
527 subtraction below; it's probably not worth it. */
528 r = log(lanczos_sum(absx)) - lanczos_g;
529 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
530 if (x < 0.0)
531 /* Use reflection formula to get value for negative x. */
532 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
533 if (Py_IS_INFINITY(r))
534 errno = ERANGE;
535 return r;
536 }
537
538 /*
539 wrapper for atan2 that deals directly with special cases before
540 delegating to the platform libm for the remaining cases. This
541 is necessary to get consistent behaviour across platforms.
542 Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
543 always follow C99.
544 */
545
546 static double
m_atan2(double y,double x)547 m_atan2(double y, double x)
548 {
549 if (Py_IS_NAN(x) || Py_IS_NAN(y))
550 return Py_NAN;
551 if (Py_IS_INFINITY(y)) {
552 if (Py_IS_INFINITY(x)) {
553 if (copysign(1., x) == 1.)
554 /* atan2(+-inf, +inf) == +-pi/4 */
555 return copysign(0.25*Py_MATH_PI, y);
556 else
557 /* atan2(+-inf, -inf) == +-pi*3/4 */
558 return copysign(0.75*Py_MATH_PI, y);
559 }
560 /* atan2(+-inf, x) == +-pi/2 for finite x */
561 return copysign(0.5*Py_MATH_PI, y);
562 }
563 if (Py_IS_INFINITY(x) || y == 0.) {
564 if (copysign(1., x) == 1.)
565 /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
566 return copysign(0., y);
567 else
568 /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
569 return copysign(Py_MATH_PI, y);
570 }
571 return atan2(y, x);
572 }
573
574
575 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
576 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
577 binary floating-point format, the result is always exact. */
578
579 static double
m_remainder(double x,double y)580 m_remainder(double x, double y)
581 {
582 /* Deal with most common case first. */
583 if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
584 double absx, absy, c, m, r;
585
586 if (y == 0.0) {
587 return Py_NAN;
588 }
589
590 absx = fabs(x);
591 absy = fabs(y);
592 m = fmod(absx, absy);
593
594 /*
595 Warning: some subtlety here. What we *want* to know at this point is
596 whether the remainder m is less than, equal to, or greater than half
597 of absy. However, we can't do that comparison directly because we
598 can't be sure that 0.5*absy is representable (the multiplication
599 might incur precision loss due to underflow). So instead we compare
600 m with the complement c = absy - m: m < 0.5*absy if and only if m <
601 c, and so on. The catch is that absy - m might also not be
602 representable, but it turns out that it doesn't matter:
603
604 - if m > 0.5*absy then absy - m is exactly representable, by
605 Sterbenz's lemma, so m > c
606 - if m == 0.5*absy then again absy - m is exactly representable
607 and m == c
608 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
609 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
610 c, or (ii) absy is tiny, either subnormal or in the lowest normal
611 binade. Then absy - m is exactly representable and again m < c.
612 */
613
614 c = absy - m;
615 if (m < c) {
616 r = m;
617 }
618 else if (m > c) {
619 r = -c;
620 }
621 else {
622 /*
623 Here absx is exactly halfway between two multiples of absy,
624 and we need to choose the even multiple. x now has the form
625
626 absx = n * absy + m
627
628 for some integer n (recalling that m = 0.5*absy at this point).
629 If n is even we want to return m; if n is odd, we need to
630 return -m.
631
632 So
633
634 0.5 * (absx - m) = (n/2) * absy
635
636 and now reducing modulo absy gives us:
637
638 | m, if n is odd
639 fmod(0.5 * (absx - m), absy) = |
640 | 0, if n is even
641
642 Now m - 2.0 * fmod(...) gives the desired result: m
643 if n is even, -m if m is odd.
644
645 Note that all steps in fmod(0.5 * (absx - m), absy)
646 will be computed exactly, with no rounding error
647 introduced.
648 */
649 assert(m == c);
650 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
651 }
652 return copysign(1.0, x) * r;
653 }
654
655 /* Special values. */
656 if (Py_IS_NAN(x)) {
657 return x;
658 }
659 if (Py_IS_NAN(y)) {
660 return y;
661 }
662 if (Py_IS_INFINITY(x)) {
663 return Py_NAN;
664 }
665 assert(Py_IS_INFINITY(y));
666 return x;
667 }
668
669
670 /*
671 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
672 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
673 special values directly, passing positive non-special values through to
674 the system log/log10.
675 */
676
677 static double
m_log(double x)678 m_log(double x)
679 {
680 if (Py_IS_FINITE(x)) {
681 if (x > 0.0)
682 return log(x);
683 errno = EDOM;
684 if (x == 0.0)
685 return -Py_HUGE_VAL; /* log(0) = -inf */
686 else
687 return Py_NAN; /* log(-ve) = nan */
688 }
689 else if (Py_IS_NAN(x))
690 return x; /* log(nan) = nan */
691 else if (x > 0.0)
692 return x; /* log(inf) = inf */
693 else {
694 errno = EDOM;
695 return Py_NAN; /* log(-inf) = nan */
696 }
697 }
698
699 /*
700 log2: log to base 2.
701
702 Uses an algorithm that should:
703
704 (a) produce exact results for powers of 2, and
705 (b) give a monotonic log2 (for positive finite floats),
706 assuming that the system log is monotonic.
707 */
708
709 static double
m_log2(double x)710 m_log2(double x)
711 {
712 if (!Py_IS_FINITE(x)) {
713 if (Py_IS_NAN(x))
714 return x; /* log2(nan) = nan */
715 else if (x > 0.0)
716 return x; /* log2(+inf) = +inf */
717 else {
718 errno = EDOM;
719 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
720 }
721 }
722
723 if (x > 0.0) {
724 return log2(x);
725 }
726 else if (x == 0.0) {
727 errno = EDOM;
728 return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
729 }
730 else {
731 errno = EDOM;
732 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
733 }
734 }
735
736 static double
m_log10(double x)737 m_log10(double x)
738 {
739 if (Py_IS_FINITE(x)) {
740 if (x > 0.0)
741 return log10(x);
742 errno = EDOM;
743 if (x == 0.0)
744 return -Py_HUGE_VAL; /* log10(0) = -inf */
745 else
746 return Py_NAN; /* log10(-ve) = nan */
747 }
748 else if (Py_IS_NAN(x))
749 return x; /* log10(nan) = nan */
750 else if (x > 0.0)
751 return x; /* log10(inf) = inf */
752 else {
753 errno = EDOM;
754 return Py_NAN; /* log10(-inf) = nan */
755 }
756 }
757
758
759 static PyObject *
math_gcd(PyObject * module,PyObject * const * args,Py_ssize_t nargs)760 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
761 {
762 // Fast-path for the common case: gcd(int, int)
763 if (nargs == 2 && PyLong_CheckExact(args[0]) && PyLong_CheckExact(args[1]))
764 {
765 return _PyLong_GCD(args[0], args[1]);
766 }
767
768 if (nargs == 0) {
769 return PyLong_FromLong(0);
770 }
771
772 PyObject *res = PyNumber_Index(args[0]);
773 if (res == NULL) {
774 return NULL;
775 }
776 if (nargs == 1) {
777 Py_SETREF(res, PyNumber_Absolute(res));
778 return res;
779 }
780
781 PyObject *one = _PyLong_GetOne(); // borrowed ref
782 for (Py_ssize_t i = 1; i < nargs; i++) {
783 PyObject *x = _PyNumber_Index(args[i]);
784 if (x == NULL) {
785 Py_DECREF(res);
786 return NULL;
787 }
788 if (res == one) {
789 /* Fast path: just check arguments.
790 It is okay to use identity comparison here. */
791 Py_DECREF(x);
792 continue;
793 }
794 Py_SETREF(res, _PyLong_GCD(res, x));
795 Py_DECREF(x);
796 if (res == NULL) {
797 return NULL;
798 }
799 }
800 return res;
801 }
802
803 PyDoc_STRVAR(math_gcd_doc,
804 "gcd($module, *integers)\n"
805 "--\n"
806 "\n"
807 "Greatest Common Divisor.");
808
809
810 static PyObject *
long_lcm(PyObject * a,PyObject * b)811 long_lcm(PyObject *a, PyObject *b)
812 {
813 PyObject *g, *m, *f, *ab;
814
815 if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
816 return PyLong_FromLong(0);
817 }
818 g = _PyLong_GCD(a, b);
819 if (g == NULL) {
820 return NULL;
821 }
822 f = PyNumber_FloorDivide(a, g);
823 Py_DECREF(g);
824 if (f == NULL) {
825 return NULL;
826 }
827 m = PyNumber_Multiply(f, b);
828 Py_DECREF(f);
829 if (m == NULL) {
830 return NULL;
831 }
832 ab = PyNumber_Absolute(m);
833 Py_DECREF(m);
834 return ab;
835 }
836
837
838 static PyObject *
math_lcm(PyObject * module,PyObject * const * args,Py_ssize_t nargs)839 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
840 {
841 PyObject *res, *x;
842 Py_ssize_t i;
843
844 if (nargs == 0) {
845 return PyLong_FromLong(1);
846 }
847 res = PyNumber_Index(args[0]);
848 if (res == NULL) {
849 return NULL;
850 }
851 if (nargs == 1) {
852 Py_SETREF(res, PyNumber_Absolute(res));
853 return res;
854 }
855
856 PyObject *zero = _PyLong_GetZero(); // borrowed ref
857 for (i = 1; i < nargs; i++) {
858 x = PyNumber_Index(args[i]);
859 if (x == NULL) {
860 Py_DECREF(res);
861 return NULL;
862 }
863 if (res == zero) {
864 /* Fast path: just check arguments.
865 It is okay to use identity comparison here. */
866 Py_DECREF(x);
867 continue;
868 }
869 Py_SETREF(res, long_lcm(res, x));
870 Py_DECREF(x);
871 if (res == NULL) {
872 return NULL;
873 }
874 }
875 return res;
876 }
877
878
879 PyDoc_STRVAR(math_lcm_doc,
880 "lcm($module, *integers)\n"
881 "--\n"
882 "\n"
883 "Least Common Multiple.");
884
885
886 /* Call is_error when errno != 0, and where x is the result libm
887 * returned. is_error will usually set up an exception and return
888 * true (1), but may return false (0) without setting up an exception.
889 */
890 static int
is_error(double x)891 is_error(double x)
892 {
893 int result = 1; /* presumption of guilt */
894 assert(errno); /* non-zero errno is a precondition for calling */
895 if (errno == EDOM)
896 PyErr_SetString(PyExc_ValueError, "math domain error");
897
898 else if (errno == ERANGE) {
899 /* ANSI C generally requires libm functions to set ERANGE
900 * on overflow, but also generally *allows* them to set
901 * ERANGE on underflow too. There's no consistency about
902 * the latter across platforms.
903 * Alas, C99 never requires that errno be set.
904 * Here we suppress the underflow errors (libm functions
905 * should return a zero on underflow, and +- HUGE_VAL on
906 * overflow, so testing the result for zero suffices to
907 * distinguish the cases).
908 *
909 * On some platforms (Ubuntu/ia64) it seems that errno can be
910 * set to ERANGE for subnormal results that do *not* underflow
911 * to zero. So to be safe, we'll ignore ERANGE whenever the
912 * function result is less than 1.5 in absolute value.
913 *
914 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
915 * are correctly detected, since the function may underflow
916 * toward -1.0 rather than 0.0.
917 */
918 if (fabs(x) < 1.5)
919 result = 0;
920 else
921 PyErr_SetString(PyExc_OverflowError,
922 "math range error");
923 }
924 else
925 /* Unexpected math error */
926 PyErr_SetFromErrno(PyExc_ValueError);
927 return result;
928 }
929
930 /*
931 math_1 is used to wrap a libm function f that takes a double
932 argument and returns a double.
933
934 The error reporting follows these rules, which are designed to do
935 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
936 platforms.
937
938 - a NaN result from non-NaN inputs causes ValueError to be raised
939 - an infinite result from finite inputs causes OverflowError to be
940 raised if can_overflow is 1, or raises ValueError if can_overflow
941 is 0.
942 - if the result is finite and errno == EDOM then ValueError is
943 raised
944 - if the result is finite and nonzero and errno == ERANGE then
945 OverflowError is raised
946
947 The last rule is used to catch overflow on platforms which follow
948 C89 but for which HUGE_VAL is not an infinity.
949
950 For the majority of one-argument functions these rules are enough
951 to ensure that Python's functions behave as specified in 'Annex F'
952 of the C99 standard, with the 'invalid' and 'divide-by-zero'
953 floating-point exceptions mapping to Python's ValueError and the
954 'overflow' floating-point exception mapping to OverflowError.
955 math_1 only works for functions that don't have singularities *and*
956 the possibility of overflow; fortunately, that covers everything we
957 care about right now.
958 */
959
960 static PyObject *
math_1(PyObject * arg,double (* func)(double),int can_overflow)961 math_1(PyObject *arg, double (*func) (double), int can_overflow)
962 {
963 double x, r;
964 x = PyFloat_AsDouble(arg);
965 if (x == -1.0 && PyErr_Occurred())
966 return NULL;
967 errno = 0;
968 r = (*func)(x);
969 if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
970 PyErr_SetString(PyExc_ValueError,
971 "math domain error"); /* invalid arg */
972 return NULL;
973 }
974 if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
975 if (can_overflow)
976 PyErr_SetString(PyExc_OverflowError,
977 "math range error"); /* overflow */
978 else
979 PyErr_SetString(PyExc_ValueError,
980 "math domain error"); /* singularity */
981 return NULL;
982 }
983 if (Py_IS_FINITE(r) && errno && is_error(r))
984 /* this branch unnecessary on most platforms */
985 return NULL;
986
987 return PyFloat_FromDouble(r);
988 }
989
990 /* variant of math_1, to be used when the function being wrapped is known to
991 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
992 errno = ERANGE for overflow). */
993
994 static PyObject *
math_1a(PyObject * arg,double (* func)(double))995 math_1a(PyObject *arg, double (*func) (double))
996 {
997 double x, r;
998 x = PyFloat_AsDouble(arg);
999 if (x == -1.0 && PyErr_Occurred())
1000 return NULL;
1001 errno = 0;
1002 r = (*func)(x);
1003 if (errno && is_error(r))
1004 return NULL;
1005 return PyFloat_FromDouble(r);
1006 }
1007
1008 /*
1009 math_2 is used to wrap a libm function f that takes two double
1010 arguments and returns a double.
1011
1012 The error reporting follows these rules, which are designed to do
1013 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1014 platforms.
1015
1016 - a NaN result from non-NaN inputs causes ValueError to be raised
1017 - an infinite result from finite inputs causes OverflowError to be
1018 raised.
1019 - if the result is finite and errno == EDOM then ValueError is
1020 raised
1021 - if the result is finite and nonzero and errno == ERANGE then
1022 OverflowError is raised
1023
1024 The last rule is used to catch overflow on platforms which follow
1025 C89 but for which HUGE_VAL is not an infinity.
1026
1027 For most two-argument functions (copysign, fmod, hypot, atan2)
1028 these rules are enough to ensure that Python's functions behave as
1029 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1030 'divide-by-zero' floating-point exceptions mapping to Python's
1031 ValueError and the 'overflow' floating-point exception mapping to
1032 OverflowError.
1033 */
1034
1035 static PyObject *
math_2(PyObject * const * args,Py_ssize_t nargs,double (* func)(double,double),const char * funcname)1036 math_2(PyObject *const *args, Py_ssize_t nargs,
1037 double (*func) (double, double), const char *funcname)
1038 {
1039 double x, y, r;
1040 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1041 return NULL;
1042 x = PyFloat_AsDouble(args[0]);
1043 if (x == -1.0 && PyErr_Occurred()) {
1044 return NULL;
1045 }
1046 y = PyFloat_AsDouble(args[1]);
1047 if (y == -1.0 && PyErr_Occurred()) {
1048 return NULL;
1049 }
1050 errno = 0;
1051 r = (*func)(x, y);
1052 if (Py_IS_NAN(r)) {
1053 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1054 errno = EDOM;
1055 else
1056 errno = 0;
1057 }
1058 else if (Py_IS_INFINITY(r)) {
1059 if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1060 errno = ERANGE;
1061 else
1062 errno = 0;
1063 }
1064 if (errno && is_error(r))
1065 return NULL;
1066 else
1067 return PyFloat_FromDouble(r);
1068 }
1069
1070 #define FUNC1(funcname, func, can_overflow, docstring) \
1071 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1072 return math_1(args, func, can_overflow); \
1073 }\
1074 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1075
1076 #define FUNC1A(funcname, func, docstring) \
1077 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1078 return math_1a(args, func); \
1079 }\
1080 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1081
1082 #define FUNC2(funcname, func, docstring) \
1083 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1084 return math_2(args, nargs, func, #funcname); \
1085 }\
1086 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1087
1088 FUNC1(acos, acos, 0,
1089 "acos($module, x, /)\n--\n\n"
1090 "Return the arc cosine (measured in radians) of x.\n\n"
1091 "The result is between 0 and pi.")
1092 FUNC1(acosh, acosh, 0,
1093 "acosh($module, x, /)\n--\n\n"
1094 "Return the inverse hyperbolic cosine of x.")
1095 FUNC1(asin, asin, 0,
1096 "asin($module, x, /)\n--\n\n"
1097 "Return the arc sine (measured in radians) of x.\n\n"
1098 "The result is between -pi/2 and pi/2.")
1099 FUNC1(asinh, asinh, 0,
1100 "asinh($module, x, /)\n--\n\n"
1101 "Return the inverse hyperbolic sine of x.")
1102 FUNC1(atan, atan, 0,
1103 "atan($module, x, /)\n--\n\n"
1104 "Return the arc tangent (measured in radians) of x.\n\n"
1105 "The result is between -pi/2 and pi/2.")
1106 FUNC2(atan2, m_atan2,
1107 "atan2($module, y, x, /)\n--\n\n"
1108 "Return the arc tangent (measured in radians) of y/x.\n\n"
1109 "Unlike atan(y/x), the signs of both x and y are considered.")
1110 FUNC1(atanh, atanh, 0,
1111 "atanh($module, x, /)\n--\n\n"
1112 "Return the inverse hyperbolic tangent of x.")
1113 FUNC1(cbrt, cbrt, 0,
1114 "cbrt($module, x, /)\n--\n\n"
1115 "Return the cube root of x.")
1116
1117 /*[clinic input]
1118 math.ceil
1119
1120 x as number: object
1121 /
1122
1123 Return the ceiling of x as an Integral.
1124
1125 This is the smallest integer >= x.
1126 [clinic start generated code]*/
1127
1128 static PyObject *
math_ceil(PyObject * module,PyObject * number)1129 math_ceil(PyObject *module, PyObject *number)
1130 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1131 {
1132 double x;
1133
1134 if (PyFloat_CheckExact(number)) {
1135 x = PyFloat_AS_DOUBLE(number);
1136 }
1137 else {
1138 math_module_state *state = get_math_module_state(module);
1139 PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
1140 if (method != NULL) {
1141 PyObject *result = _PyObject_CallNoArgs(method);
1142 Py_DECREF(method);
1143 return result;
1144 }
1145 if (PyErr_Occurred())
1146 return NULL;
1147 x = PyFloat_AsDouble(number);
1148 if (x == -1.0 && PyErr_Occurred())
1149 return NULL;
1150 }
1151 return PyLong_FromDouble(ceil(x));
1152 }
1153
1154 FUNC2(copysign, copysign,
1155 "copysign($module, x, y, /)\n--\n\n"
1156 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1157 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1158 "returns -1.0.\n")
1159 FUNC1(cos, cos, 0,
1160 "cos($module, x, /)\n--\n\n"
1161 "Return the cosine of x (measured in radians).")
1162 FUNC1(cosh, cosh, 1,
1163 "cosh($module, x, /)\n--\n\n"
1164 "Return the hyperbolic cosine of x.")
1165 FUNC1A(erf, erf,
1166 "erf($module, x, /)\n--\n\n"
1167 "Error function at x.")
1168 FUNC1A(erfc, erfc,
1169 "erfc($module, x, /)\n--\n\n"
1170 "Complementary error function at x.")
1171 FUNC1(exp, exp, 1,
1172 "exp($module, x, /)\n--\n\n"
1173 "Return e raised to the power of x.")
1174 FUNC1(exp2, exp2, 1,
1175 "exp2($module, x, /)\n--\n\n"
1176 "Return 2 raised to the power of x.")
1177 FUNC1(expm1, expm1, 1,
1178 "expm1($module, x, /)\n--\n\n"
1179 "Return exp(x)-1.\n\n"
1180 "This function avoids the loss of precision involved in the direct "
1181 "evaluation of exp(x)-1 for small x.")
1182 FUNC1(fabs, fabs, 0,
1183 "fabs($module, x, /)\n--\n\n"
1184 "Return the absolute value of the float x.")
1185
1186 /*[clinic input]
1187 math.floor
1188
1189 x as number: object
1190 /
1191
1192 Return the floor of x as an Integral.
1193
1194 This is the largest integer <= x.
1195 [clinic start generated code]*/
1196
1197 static PyObject *
math_floor(PyObject * module,PyObject * number)1198 math_floor(PyObject *module, PyObject *number)
1199 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1200 {
1201 double x;
1202
1203 if (PyFloat_CheckExact(number)) {
1204 x = PyFloat_AS_DOUBLE(number);
1205 }
1206 else {
1207 math_module_state *state = get_math_module_state(module);
1208 PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
1209 if (method != NULL) {
1210 PyObject *result = _PyObject_CallNoArgs(method);
1211 Py_DECREF(method);
1212 return result;
1213 }
1214 if (PyErr_Occurred())
1215 return NULL;
1216 x = PyFloat_AsDouble(number);
1217 if (x == -1.0 && PyErr_Occurred())
1218 return NULL;
1219 }
1220 return PyLong_FromDouble(floor(x));
1221 }
1222
1223 FUNC1A(gamma, m_tgamma,
1224 "gamma($module, x, /)\n--\n\n"
1225 "Gamma function at x.")
1226 FUNC1A(lgamma, m_lgamma,
1227 "lgamma($module, x, /)\n--\n\n"
1228 "Natural logarithm of absolute value of Gamma function at x.")
1229 FUNC1(log1p, m_log1p, 0,
1230 "log1p($module, x, /)\n--\n\n"
1231 "Return the natural logarithm of 1+x (base e).\n\n"
1232 "The result is computed in a way which is accurate for x near zero.")
1233 FUNC2(remainder, m_remainder,
1234 "remainder($module, x, y, /)\n--\n\n"
1235 "Difference between x and the closest integer multiple of y.\n\n"
1236 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1237 "In the case where x is exactly halfway between two multiples of\n"
1238 "y, the nearest even value of n is used. The result is always exact.")
1239 FUNC1(sin, sin, 0,
1240 "sin($module, x, /)\n--\n\n"
1241 "Return the sine of x (measured in radians).")
1242 FUNC1(sinh, sinh, 1,
1243 "sinh($module, x, /)\n--\n\n"
1244 "Return the hyperbolic sine of x.")
1245 FUNC1(sqrt, sqrt, 0,
1246 "sqrt($module, x, /)\n--\n\n"
1247 "Return the square root of x.")
1248 FUNC1(tan, tan, 0,
1249 "tan($module, x, /)\n--\n\n"
1250 "Return the tangent of x (measured in radians).")
1251 FUNC1(tanh, tanh, 0,
1252 "tanh($module, x, /)\n--\n\n"
1253 "Return the hyperbolic tangent of x.")
1254
1255 /* Precision summation function as msum() by Raymond Hettinger in
1256 <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
1257 enhanced with the exact partials sum and roundoff from Mark
1258 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1259 See those links for more details, proofs and other references.
1260
1261 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1262 roundTiesToEven are assumed.
1263
1264 Note 2: No provision is made for intermediate overflow handling;
1265 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1266 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1267 overflow of the first partial sum.
1268
1269 Note 3: The algorithm has two potential sources of fragility. First, C
1270 permits arithmetic operations on `double`s to be performed in an
1271 intermediate format whose range and precision may be greater than those of
1272 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1273 example on machines using the now largely historical x87 FPUs. In this case,
1274 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1275 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1276 should be safe from this source of errors. Second, an aggressively
1277 optimizing compiler can re-associate operations so that (for example) the
1278 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1279 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1280 re-association would be in violation of the C standard, and should not occur
1281 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1282 -fassociative-math). Such optimizations should be avoided for this module.
1283
1284 Note 4: The signature of math.fsum() differs from builtins.sum()
1285 because the start argument doesn't make sense in the context of
1286 accurate summation. Since the partials table is collapsed before
1287 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1288 accurate result returned by sum(itertools.chain(seq1, seq2)).
1289 */
1290
1291 #define NUM_PARTIALS 32 /* initial partials array size, on stack */
1292
1293 /* Extend the partials array p[] by doubling its size. */
1294 static int /* non-zero on error */
_fsum_realloc(double ** p_ptr,Py_ssize_t n,double * ps,Py_ssize_t * m_ptr)1295 _fsum_realloc(double **p_ptr, Py_ssize_t n,
1296 double *ps, Py_ssize_t *m_ptr)
1297 {
1298 void *v = NULL;
1299 Py_ssize_t m = *m_ptr;
1300
1301 m += m; /* double */
1302 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1303 double *p = *p_ptr;
1304 if (p == ps) {
1305 v = PyMem_Malloc(sizeof(double) * m);
1306 if (v != NULL)
1307 memcpy(v, ps, sizeof(double) * n);
1308 }
1309 else
1310 v = PyMem_Realloc(p, sizeof(double) * m);
1311 }
1312 if (v == NULL) { /* size overflow or no memory */
1313 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1314 return 1;
1315 }
1316 *p_ptr = (double*) v;
1317 *m_ptr = m;
1318 return 0;
1319 }
1320
1321 /* Full precision summation of a sequence of floats.
1322
1323 def msum(iterable):
1324 partials = [] # sorted, non-overlapping partial sums
1325 for x in iterable:
1326 i = 0
1327 for y in partials:
1328 if abs(x) < abs(y):
1329 x, y = y, x
1330 hi = x + y
1331 lo = y - (hi - x)
1332 if lo:
1333 partials[i] = lo
1334 i += 1
1335 x = hi
1336 partials[i:] = [x]
1337 return sum_exact(partials)
1338
1339 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1340 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1341 partial so that the list of partial sums remains exact.
1342
1343 Sum_exact() adds the partial sums exactly and correctly rounds the final
1344 result (using the round-half-to-even rule). The items in partials remain
1345 non-zero, non-special, non-overlapping and strictly increasing in
1346 magnitude, but possibly not all having the same sign.
1347
1348 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1349 */
1350
1351 /*[clinic input]
1352 math.fsum
1353
1354 seq: object
1355 /
1356
1357 Return an accurate floating-point sum of values in the iterable seq.
1358
1359 Assumes IEEE-754 floating-point arithmetic.
1360 [clinic start generated code]*/
1361
1362 static PyObject *
math_fsum(PyObject * module,PyObject * seq)1363 math_fsum(PyObject *module, PyObject *seq)
1364 /*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
1365 {
1366 PyObject *item, *iter, *sum = NULL;
1367 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1368 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1369 double xsave, special_sum = 0.0, inf_sum = 0.0;
1370 double hi, yr, lo = 0.0;
1371
1372 iter = PyObject_GetIter(seq);
1373 if (iter == NULL)
1374 return NULL;
1375
1376 for(;;) { /* for x in iterable */
1377 assert(0 <= n && n <= m);
1378 assert((m == NUM_PARTIALS && p == ps) ||
1379 (m > NUM_PARTIALS && p != NULL));
1380
1381 item = PyIter_Next(iter);
1382 if (item == NULL) {
1383 if (PyErr_Occurred())
1384 goto _fsum_error;
1385 break;
1386 }
1387 ASSIGN_DOUBLE(x, item, error_with_item);
1388 Py_DECREF(item);
1389
1390 xsave = x;
1391 for (i = j = 0; j < n; j++) { /* for y in partials */
1392 y = p[j];
1393 if (fabs(x) < fabs(y)) {
1394 t = x; x = y; y = t;
1395 }
1396 hi = x + y;
1397 yr = hi - x;
1398 lo = y - yr;
1399 if (lo != 0.0)
1400 p[i++] = lo;
1401 x = hi;
1402 }
1403
1404 n = i; /* ps[i:] = [x] */
1405 if (x != 0.0) {
1406 if (! Py_IS_FINITE(x)) {
1407 /* a nonfinite x could arise either as
1408 a result of intermediate overflow, or
1409 as a result of a nan or inf in the
1410 summands */
1411 if (Py_IS_FINITE(xsave)) {
1412 PyErr_SetString(PyExc_OverflowError,
1413 "intermediate overflow in fsum");
1414 goto _fsum_error;
1415 }
1416 if (Py_IS_INFINITY(xsave))
1417 inf_sum += xsave;
1418 special_sum += xsave;
1419 /* reset partials */
1420 n = 0;
1421 }
1422 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1423 goto _fsum_error;
1424 else
1425 p[n++] = x;
1426 }
1427 }
1428
1429 if (special_sum != 0.0) {
1430 if (Py_IS_NAN(inf_sum))
1431 PyErr_SetString(PyExc_ValueError,
1432 "-inf + inf in fsum");
1433 else
1434 sum = PyFloat_FromDouble(special_sum);
1435 goto _fsum_error;
1436 }
1437
1438 hi = 0.0;
1439 if (n > 0) {
1440 hi = p[--n];
1441 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1442 inexact. */
1443 while (n > 0) {
1444 x = hi;
1445 y = p[--n];
1446 assert(fabs(y) < fabs(x));
1447 hi = x + y;
1448 yr = hi - x;
1449 lo = y - yr;
1450 if (lo != 0.0)
1451 break;
1452 }
1453 /* Make half-even rounding work across multiple partials.
1454 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1455 digit to two instead of down to zero (the 1e-16 makes the 1
1456 slightly closer to two). With a potential 1 ULP rounding
1457 error fixed-up, math.fsum() can guarantee commutativity. */
1458 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1459 (lo > 0.0 && p[n-1] > 0.0))) {
1460 y = lo * 2.0;
1461 x = hi + y;
1462 yr = x - hi;
1463 if (y == yr)
1464 hi = x;
1465 }
1466 }
1467 sum = PyFloat_FromDouble(hi);
1468
1469 _fsum_error:
1470 Py_DECREF(iter);
1471 if (p != ps)
1472 PyMem_Free(p);
1473 return sum;
1474
1475 error_with_item:
1476 Py_DECREF(item);
1477 goto _fsum_error;
1478 }
1479
1480 #undef NUM_PARTIALS
1481
1482
1483 static unsigned long
count_set_bits(unsigned long n)1484 count_set_bits(unsigned long n)
1485 {
1486 unsigned long count = 0;
1487 while (n != 0) {
1488 ++count;
1489 n &= n - 1; /* clear least significant bit */
1490 }
1491 return count;
1492 }
1493
1494 /* Integer square root
1495
1496 Given a nonnegative integer `n`, we want to compute the largest integer
1497 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1498 square root of `n`.
1499
1500 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1501 a positive integer `n`, the algorithm produces at each iteration an integer
1502 approximation `a` to the square root of `n >> s` for some even integer `s`,
1503 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1504 zero and we have an approximation to the square root of `n` itself.
1505
1506 At every step, the approximation `a` is strictly within 1.0 of the true square
1507 root, so we have
1508
1509 (a - 1)**2 < (n >> s) < (a + 1)**2
1510
1511 After the final iteration, a check-and-correct step is needed to determine
1512 whether `a` or `a - 1` gives the desired integer square root of `n`.
1513
1514 The algorithm is remarkable in its simplicity. There's no need for a
1515 per-iteration check-and-correct step, and termination is straightforward: the
1516 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1517 for `n > 1`). The only tricky part of the correctness proof is in establishing
1518 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1519 iteration to the next. A sketch of the proof of this is given below.
1520
1521 In addition to the proof sketch, a formal, computer-verified proof
1522 of correctness (using Lean) of an equivalent recursive algorithm can be found
1523 here:
1524
1525 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1526
1527
1528 Here's Python code equivalent to the C implementation below:
1529
1530 def isqrt(n):
1531 """
1532 Return the integer part of the square root of the input.
1533 """
1534 n = operator.index(n)
1535
1536 if n < 0:
1537 raise ValueError("isqrt() argument must be nonnegative")
1538 if n == 0:
1539 return 0
1540
1541 c = (n.bit_length() - 1) // 2
1542 a = 1
1543 d = 0
1544 for s in reversed(range(c.bit_length())):
1545 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1546 e = d
1547 d = c >> s
1548 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1549
1550 return a - (a*a > n)
1551
1552
1553 Sketch of proof of correctness
1554 ------------------------------
1555
1556 The delicate part of the correctness proof is showing that the loop invariant
1557 is preserved from one iteration to the next. That is, just before the line
1558
1559 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1560
1561 is executed in the above code, we know that
1562
1563 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1564
1565 (since `e` is always the value of `d` from the previous iteration). We must
1566 prove that after that line is executed, we have
1567
1568 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1569
1570 To facilitate the proof, we make some changes of notation. Write `m` for
1571 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1572
1573 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1574
1575 or equivalently:
1576
1577 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1578
1579 Then we can rewrite (1) as:
1580
1581 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1582
1583 and we must show that (b - 1)**2 < m < (b + 1)**2.
1584
1585 From this point on, we switch to mathematical notation, so `/` means exact
1586 division rather than integer division and `^` is used for exponentiation. We
1587 use the `√` symbol for the exact square root. In (3), we can remove the
1588 implicit floor operation to give:
1589
1590 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1591
1592 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1593
1594 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1595
1596 Squaring and dividing through by `2^(d-e+1) a` gives
1597
1598 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1599
1600 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1601 right-hand side of (6) with `1`, and now replacing the central
1602 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1603
1604 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1605
1606 Or equivalently, from (2):
1607
1608 (7) -1 < b - √m < 1
1609
1610 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1611 to prove.
1612
1613 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1614 a` that was used to get line (7) above. From the definition of `c`, we have
1615 `4^c <= n`, which implies
1616
1617 (8) 4^d <= m
1618
1619 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1620 that `2d - 2e - 1 <= d` and hence that
1621
1622 (9) 4^(2d - 2e - 1) <= m
1623
1624 Dividing both sides by `4^(d - e)` gives
1625
1626 (10) 4^(d - e - 1) <= m / 4^(d - e)
1627
1628 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1629
1630 (11) 4^(d - e - 1) < (a + 1)^2
1631
1632 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1633 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1634 completes the proof sketch.
1635
1636 */
1637
1638 /*
1639 The _approximate_isqrt_tab table provides approximate square roots for
1640 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1641
1642 a = _approximate_isqrt_tab[(n >> 8) - 64]
1643
1644 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1645
1646 The table was computed in Python using the expression:
1647
1648 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1649 */
1650
1651 static const uint8_t _approximate_isqrt_tab[192] = {
1652 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1653 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1654 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1655 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1656 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1657 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1658 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1659 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1660 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1661 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1662 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1663 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1664 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1665 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1666 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1667 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1668 };
1669
1670 /* Approximate square root of a large 64-bit integer.
1671
1672 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1673 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1674
1675 static inline uint32_t
_approximate_isqrt(uint64_t n)1676 _approximate_isqrt(uint64_t n)
1677 {
1678 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1679 u = (u << 7) + (uint32_t)(n >> 41) / u;
1680 return (u << 15) + (uint32_t)((n >> 17) / u);
1681 }
1682
1683 /*[clinic input]
1684 math.isqrt
1685
1686 n: object
1687 /
1688
1689 Return the integer part of the square root of the input.
1690 [clinic start generated code]*/
1691
1692 static PyObject *
math_isqrt(PyObject * module,PyObject * n)1693 math_isqrt(PyObject *module, PyObject *n)
1694 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1695 {
1696 int a_too_large, c_bit_length;
1697 size_t c, d;
1698 uint64_t m;
1699 uint32_t u;
1700 PyObject *a = NULL, *b;
1701
1702 n = _PyNumber_Index(n);
1703 if (n == NULL) {
1704 return NULL;
1705 }
1706
1707 if (_PyLong_IsNegative((PyLongObject *)n)) {
1708 PyErr_SetString(
1709 PyExc_ValueError,
1710 "isqrt() argument must be nonnegative");
1711 goto error;
1712 }
1713 if (_PyLong_IsZero((PyLongObject *)n)) {
1714 Py_DECREF(n);
1715 return PyLong_FromLong(0);
1716 }
1717
1718 /* c = (n.bit_length() - 1) // 2 */
1719 c = _PyLong_NumBits(n);
1720 if (c == (size_t)(-1)) {
1721 goto error;
1722 }
1723 c = (c - 1U) / 2U;
1724
1725 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1726 fast, almost branch-free algorithm. */
1727 if (c <= 31U) {
1728 int shift = 31 - (int)c;
1729 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1730 Py_DECREF(n);
1731 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1732 return NULL;
1733 }
1734 u = _approximate_isqrt(m << 2*shift) >> shift;
1735 u -= (uint64_t)u * u > m;
1736 return PyLong_FromUnsignedLong(u);
1737 }
1738
1739 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1740 arithmetic, then switch to using Python long integers. */
1741
1742 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1743 c_bit_length = 6;
1744 while ((c >> c_bit_length) > 0U) {
1745 ++c_bit_length;
1746 }
1747
1748 /* Initialise d and a. */
1749 d = c >> (c_bit_length - 5);
1750 b = _PyLong_Rshift(n, 2U*c - 62U);
1751 if (b == NULL) {
1752 goto error;
1753 }
1754 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1755 Py_DECREF(b);
1756 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1757 goto error;
1758 }
1759 u = _approximate_isqrt(m) >> (31U - d);
1760 a = PyLong_FromUnsignedLong(u);
1761 if (a == NULL) {
1762 goto error;
1763 }
1764
1765 for (int s = c_bit_length - 6; s >= 0; --s) {
1766 PyObject *q;
1767 size_t e = d;
1768
1769 d = c >> s;
1770
1771 /* q = (n >> 2*c - e - d + 1) // a */
1772 q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1773 if (q == NULL) {
1774 goto error;
1775 }
1776 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1777 if (q == NULL) {
1778 goto error;
1779 }
1780
1781 /* a = (a << d - 1 - e) + q */
1782 Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1783 if (a == NULL) {
1784 Py_DECREF(q);
1785 goto error;
1786 }
1787 Py_SETREF(a, PyNumber_Add(a, q));
1788 Py_DECREF(q);
1789 if (a == NULL) {
1790 goto error;
1791 }
1792 }
1793
1794 /* The correct result is either a or a - 1. Figure out which, and
1795 decrement a if necessary. */
1796
1797 /* a_too_large = n < a * a */
1798 b = PyNumber_Multiply(a, a);
1799 if (b == NULL) {
1800 goto error;
1801 }
1802 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1803 Py_DECREF(b);
1804 if (a_too_large == -1) {
1805 goto error;
1806 }
1807
1808 if (a_too_large) {
1809 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1810 }
1811 Py_DECREF(n);
1812 return a;
1813
1814 error:
1815 Py_XDECREF(a);
1816 Py_DECREF(n);
1817 return NULL;
1818 }
1819
1820 /* Divide-and-conquer factorial algorithm
1821 *
1822 * Based on the formula and pseudo-code provided at:
1823 * http://www.luschny.de/math/factorial/binarysplitfact.html
1824 *
1825 * Faster algorithms exist, but they're more complicated and depend on
1826 * a fast prime factorization algorithm.
1827 *
1828 * Notes on the algorithm
1829 * ----------------------
1830 *
1831 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1832 * computed separately, and then combined using a left shift.
1833 *
1834 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1835 * odd divisor) of factorial(n), using the formula:
1836 *
1837 * factorial_odd_part(n) =
1838 *
1839 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1840 *
1841 * Example: factorial_odd_part(20) =
1842 *
1843 * (1) *
1844 * (1) *
1845 * (1 * 3 * 5) *
1846 * (1 * 3 * 5 * 7 * 9) *
1847 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1848 *
1849 * Here i goes from large to small: the first term corresponds to i=4 (any
1850 * larger i gives an empty product), and the last term corresponds to i=0.
1851 * Each term can be computed from the last by multiplying by the extra odd
1852 * numbers required: e.g., to get from the penultimate term to the last one,
1853 * we multiply by (11 * 13 * 15 * 17 * 19).
1854 *
1855 * To see a hint of why this formula works, here are the same numbers as above
1856 * but with the even parts (i.e., the appropriate powers of 2) included. For
1857 * each subterm in the product for i, we multiply that subterm by 2**i:
1858 *
1859 * factorial(20) =
1860 *
1861 * (16) *
1862 * (8) *
1863 * (4 * 12 * 20) *
1864 * (2 * 6 * 10 * 14 * 18) *
1865 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1866 *
1867 * The factorial_partial_product function computes the product of all odd j in
1868 * range(start, stop) for given start and stop. It's used to compute the
1869 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1870 * operates recursively, repeatedly splitting the range into two roughly equal
1871 * pieces until the subranges are small enough to be computed using only C
1872 * integer arithmetic.
1873 *
1874 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1875 * the factorial) is computed independently in the main math_factorial
1876 * function. By standard results, its value is:
1877 *
1878 * two_valuation = n//2 + n//4 + n//8 + ....
1879 *
1880 * It can be shown (e.g., by complete induction on n) that two_valuation is
1881 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1882 * '1'-bits in the binary expansion of n.
1883 */
1884
1885 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1886 * divide and conquer. Assumes start and stop are odd and stop > start.
1887 * max_bits must be >= bit_length(stop - 2). */
1888
1889 static PyObject *
factorial_partial_product(unsigned long start,unsigned long stop,unsigned long max_bits)1890 factorial_partial_product(unsigned long start, unsigned long stop,
1891 unsigned long max_bits)
1892 {
1893 unsigned long midpoint, num_operands;
1894 PyObject *left = NULL, *right = NULL, *result = NULL;
1895
1896 /* If the return value will fit an unsigned long, then we can
1897 * multiply in a tight, fast loop where each multiply is O(1).
1898 * Compute an upper bound on the number of bits required to store
1899 * the answer.
1900 *
1901 * Storing some integer z requires floor(lg(z))+1 bits, which is
1902 * conveniently the value returned by bit_length(z). The
1903 * product x*y will require at most
1904 * bit_length(x) + bit_length(y) bits to store, based
1905 * on the idea that lg product = lg x + lg y.
1906 *
1907 * We know that stop - 2 is the largest number to be multiplied. From
1908 * there, we have: bit_length(answer) <= num_operands *
1909 * bit_length(stop - 2)
1910 */
1911
1912 num_operands = (stop - start) / 2;
1913 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1914 * unlikely case of an overflow in num_operands * max_bits. */
1915 if (num_operands <= 8 * SIZEOF_LONG &&
1916 num_operands * max_bits <= 8 * SIZEOF_LONG) {
1917 unsigned long j, total;
1918 for (total = start, j = start + 2; j < stop; j += 2)
1919 total *= j;
1920 return PyLong_FromUnsignedLong(total);
1921 }
1922
1923 /* find midpoint of range(start, stop), rounded up to next odd number. */
1924 midpoint = (start + num_operands) | 1;
1925 left = factorial_partial_product(start, midpoint,
1926 _Py_bit_length(midpoint - 2));
1927 if (left == NULL)
1928 goto error;
1929 right = factorial_partial_product(midpoint, stop, max_bits);
1930 if (right == NULL)
1931 goto error;
1932 result = PyNumber_Multiply(left, right);
1933
1934 error:
1935 Py_XDECREF(left);
1936 Py_XDECREF(right);
1937 return result;
1938 }
1939
1940 /* factorial_odd_part: compute the odd part of factorial(n). */
1941
1942 static PyObject *
factorial_odd_part(unsigned long n)1943 factorial_odd_part(unsigned long n)
1944 {
1945 long i;
1946 unsigned long v, lower, upper;
1947 PyObject *partial, *tmp, *inner, *outer;
1948
1949 inner = PyLong_FromLong(1);
1950 if (inner == NULL)
1951 return NULL;
1952 outer = Py_NewRef(inner);
1953
1954 upper = 3;
1955 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
1956 v = n >> i;
1957 if (v <= 2)
1958 continue;
1959 lower = upper;
1960 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
1961 upper = (v + 1) | 1;
1962 /* Here inner is the product of all odd integers j in the range (0,
1963 n/2**(i+1)]. The factorial_partial_product call below gives the
1964 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
1965 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
1966 /* inner *= partial */
1967 if (partial == NULL)
1968 goto error;
1969 tmp = PyNumber_Multiply(inner, partial);
1970 Py_DECREF(partial);
1971 if (tmp == NULL)
1972 goto error;
1973 Py_SETREF(inner, tmp);
1974 /* Now inner is the product of all odd integers j in the range (0,
1975 n/2**i], giving the inner product in the formula above. */
1976
1977 /* outer *= inner; */
1978 tmp = PyNumber_Multiply(outer, inner);
1979 if (tmp == NULL)
1980 goto error;
1981 Py_SETREF(outer, tmp);
1982 }
1983 Py_DECREF(inner);
1984 return outer;
1985
1986 error:
1987 Py_DECREF(outer);
1988 Py_DECREF(inner);
1989 return NULL;
1990 }
1991
1992
1993 /* Lookup table for small factorial values */
1994
1995 static const unsigned long SmallFactorials[] = {
1996 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
1997 362880, 3628800, 39916800, 479001600,
1998 #if SIZEOF_LONG >= 8
1999 6227020800, 87178291200, 1307674368000,
2000 20922789888000, 355687428096000, 6402373705728000,
2001 121645100408832000, 2432902008176640000
2002 #endif
2003 };
2004
2005 /*[clinic input]
2006 math.factorial
2007
2008 n as arg: object
2009 /
2010
2011 Find n!.
2012
2013 Raise a ValueError if x is negative or non-integral.
2014 [clinic start generated code]*/
2015
2016 static PyObject *
math_factorial(PyObject * module,PyObject * arg)2017 math_factorial(PyObject *module, PyObject *arg)
2018 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2019 {
2020 long x, two_valuation;
2021 int overflow;
2022 PyObject *result, *odd_part;
2023
2024 x = PyLong_AsLongAndOverflow(arg, &overflow);
2025 if (x == -1 && PyErr_Occurred()) {
2026 return NULL;
2027 }
2028 else if (overflow == 1) {
2029 PyErr_Format(PyExc_OverflowError,
2030 "factorial() argument should not exceed %ld",
2031 LONG_MAX);
2032 return NULL;
2033 }
2034 else if (overflow == -1 || x < 0) {
2035 PyErr_SetString(PyExc_ValueError,
2036 "factorial() not defined for negative values");
2037 return NULL;
2038 }
2039
2040 /* use lookup table if x is small */
2041 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2042 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2043
2044 /* else express in the form odd_part * 2**two_valuation, and compute as
2045 odd_part << two_valuation. */
2046 odd_part = factorial_odd_part(x);
2047 if (odd_part == NULL)
2048 return NULL;
2049 two_valuation = x - count_set_bits(x);
2050 result = _PyLong_Lshift(odd_part, two_valuation);
2051 Py_DECREF(odd_part);
2052 return result;
2053 }
2054
2055
2056 /*[clinic input]
2057 math.trunc
2058
2059 x: object
2060 /
2061
2062 Truncates the Real x to the nearest Integral toward 0.
2063
2064 Uses the __trunc__ magic method.
2065 [clinic start generated code]*/
2066
2067 static PyObject *
math_trunc(PyObject * module,PyObject * x)2068 math_trunc(PyObject *module, PyObject *x)
2069 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2070 {
2071 PyObject *trunc, *result;
2072
2073 if (PyFloat_CheckExact(x)) {
2074 return PyFloat_Type.tp_as_number->nb_int(x);
2075 }
2076
2077 math_module_state *state = get_math_module_state(module);
2078 trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
2079 if (trunc == NULL) {
2080 if (!PyErr_Occurred())
2081 PyErr_Format(PyExc_TypeError,
2082 "type %.100s doesn't define __trunc__ method",
2083 Py_TYPE(x)->tp_name);
2084 return NULL;
2085 }
2086 result = _PyObject_CallNoArgs(trunc);
2087 Py_DECREF(trunc);
2088 return result;
2089 }
2090
2091
2092 /*[clinic input]
2093 math.frexp
2094
2095 x: double
2096 /
2097
2098 Return the mantissa and exponent of x, as pair (m, e).
2099
2100 m is a float and e is an int, such that x = m * 2.**e.
2101 If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2102 [clinic start generated code]*/
2103
2104 static PyObject *
math_frexp_impl(PyObject * module,double x)2105 math_frexp_impl(PyObject *module, double x)
2106 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2107 {
2108 int i;
2109 /* deal with special cases directly, to sidestep platform
2110 differences */
2111 if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2112 i = 0;
2113 }
2114 else {
2115 x = frexp(x, &i);
2116 }
2117 return Py_BuildValue("(di)", x, i);
2118 }
2119
2120
2121 /*[clinic input]
2122 math.ldexp
2123
2124 x: double
2125 i: object
2126 /
2127
2128 Return x * (2**i).
2129
2130 This is essentially the inverse of frexp().
2131 [clinic start generated code]*/
2132
2133 static PyObject *
math_ldexp_impl(PyObject * module,double x,PyObject * i)2134 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2135 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2136 {
2137 double r;
2138 long exp;
2139 int overflow;
2140
2141 if (PyLong_Check(i)) {
2142 /* on overflow, replace exponent with either LONG_MAX
2143 or LONG_MIN, depending on the sign. */
2144 exp = PyLong_AsLongAndOverflow(i, &overflow);
2145 if (exp == -1 && PyErr_Occurred())
2146 return NULL;
2147 if (overflow)
2148 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2149 }
2150 else {
2151 PyErr_SetString(PyExc_TypeError,
2152 "Expected an int as second argument to ldexp.");
2153 return NULL;
2154 }
2155
2156 if (x == 0. || !Py_IS_FINITE(x)) {
2157 /* NaNs, zeros and infinities are returned unchanged */
2158 r = x;
2159 errno = 0;
2160 } else if (exp > INT_MAX) {
2161 /* overflow */
2162 r = copysign(Py_HUGE_VAL, x);
2163 errno = ERANGE;
2164 } else if (exp < INT_MIN) {
2165 /* underflow to +-0 */
2166 r = copysign(0., x);
2167 errno = 0;
2168 } else {
2169 errno = 0;
2170 r = ldexp(x, (int)exp);
2171 if (Py_IS_INFINITY(r))
2172 errno = ERANGE;
2173 }
2174
2175 if (errno && is_error(r))
2176 return NULL;
2177 return PyFloat_FromDouble(r);
2178 }
2179
2180
2181 /*[clinic input]
2182 math.modf
2183
2184 x: double
2185 /
2186
2187 Return the fractional and integer parts of x.
2188
2189 Both results carry the sign of x and are floats.
2190 [clinic start generated code]*/
2191
2192 static PyObject *
math_modf_impl(PyObject * module,double x)2193 math_modf_impl(PyObject *module, double x)
2194 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2195 {
2196 double y;
2197 /* some platforms don't do the right thing for NaNs and
2198 infinities, so we take care of special cases directly. */
2199 if (Py_IS_INFINITY(x))
2200 return Py_BuildValue("(dd)", copysign(0., x), x);
2201 else if (Py_IS_NAN(x))
2202 return Py_BuildValue("(dd)", x, x);
2203
2204 errno = 0;
2205 x = modf(x, &y);
2206 return Py_BuildValue("(dd)", x, y);
2207 }
2208
2209
2210 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2211 do that by itself -- loghelper can. func is log or log10, and name is
2212 "log" or "log10". Note that overflow of the result isn't possible: an int
2213 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2214 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2215 small enough to fit in an IEEE single. log and log10 are even smaller.
2216 However, intermediate overflow is possible for an int if the number of bits
2217 in that int is larger than PY_SSIZE_T_MAX. */
2218
2219 static PyObject*
loghelper(PyObject * arg,double (* func)(double))2220 loghelper(PyObject* arg, double (*func)(double))
2221 {
2222 /* If it is int, do it ourselves. */
2223 if (PyLong_Check(arg)) {
2224 double x, result;
2225 Py_ssize_t e;
2226
2227 /* Negative or zero inputs give a ValueError. */
2228 if (!_PyLong_IsPositive((PyLongObject *)arg)) {
2229 PyErr_SetString(PyExc_ValueError,
2230 "math domain error");
2231 return NULL;
2232 }
2233
2234 x = PyLong_AsDouble(arg);
2235 if (x == -1.0 && PyErr_Occurred()) {
2236 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2237 return NULL;
2238 /* Here the conversion to double overflowed, but it's possible
2239 to compute the log anyway. Clear the exception and continue. */
2240 PyErr_Clear();
2241 x = _PyLong_Frexp((PyLongObject *)arg, &e);
2242 if (x == -1.0 && PyErr_Occurred())
2243 return NULL;
2244 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2245 result = func(x) + func(2.0) * e;
2246 }
2247 else
2248 /* Successfully converted x to a double. */
2249 result = func(x);
2250 return PyFloat_FromDouble(result);
2251 }
2252
2253 /* Else let libm handle it by itself. */
2254 return math_1(arg, func, 0);
2255 }
2256
2257
2258 /* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2259 for support of multiple signatures */
2260 static PyObject *
math_log(PyObject * module,PyObject * const * args,Py_ssize_t nargs)2261 math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
2262 {
2263 PyObject *num, *den;
2264 PyObject *ans;
2265
2266 if (!_PyArg_CheckPositional("log", nargs, 1, 2))
2267 return NULL;
2268
2269 num = loghelper(args[0], m_log);
2270 if (num == NULL || nargs == 1)
2271 return num;
2272
2273 den = loghelper(args[1], m_log);
2274 if (den == NULL) {
2275 Py_DECREF(num);
2276 return NULL;
2277 }
2278
2279 ans = PyNumber_TrueDivide(num, den);
2280 Py_DECREF(num);
2281 Py_DECREF(den);
2282 return ans;
2283 }
2284
2285 PyDoc_STRVAR(math_log_doc,
2286 "log(x, [base=math.e])\n\
2287 Return the logarithm of x to the given base.\n\n\
2288 If the base is not specified, returns the natural logarithm (base e) of x.");
2289
2290 /*[clinic input]
2291 math.log2
2292
2293 x: object
2294 /
2295
2296 Return the base 2 logarithm of x.
2297 [clinic start generated code]*/
2298
2299 static PyObject *
math_log2(PyObject * module,PyObject * x)2300 math_log2(PyObject *module, PyObject *x)
2301 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2302 {
2303 return loghelper(x, m_log2);
2304 }
2305
2306
2307 /*[clinic input]
2308 math.log10
2309
2310 x: object
2311 /
2312
2313 Return the base 10 logarithm of x.
2314 [clinic start generated code]*/
2315
2316 static PyObject *
math_log10(PyObject * module,PyObject * x)2317 math_log10(PyObject *module, PyObject *x)
2318 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2319 {
2320 return loghelper(x, m_log10);
2321 }
2322
2323
2324 /*[clinic input]
2325 math.fma
2326
2327 x: double
2328 y: double
2329 z: double
2330 /
2331
2332 Fused multiply-add operation.
2333
2334 Compute (x * y) + z with a single round.
2335 [clinic start generated code]*/
2336
2337 static PyObject *
math_fma_impl(PyObject * module,double x,double y,double z)2338 math_fma_impl(PyObject *module, double x, double y, double z)
2339 /*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2340 {
2341 double r = fma(x, y, z);
2342
2343 /* Fast path: if we got a finite result, we're done. */
2344 if (Py_IS_FINITE(r)) {
2345 return PyFloat_FromDouble(r);
2346 }
2347
2348 /* Non-finite result. Raise an exception if appropriate, else return r. */
2349 if (Py_IS_NAN(r)) {
2350 if (!Py_IS_NAN(x) && !Py_IS_NAN(y) && !Py_IS_NAN(z)) {
2351 /* NaN result from non-NaN inputs. */
2352 PyErr_SetString(PyExc_ValueError, "invalid operation in fma");
2353 return NULL;
2354 }
2355 }
2356 else if (Py_IS_FINITE(x) && Py_IS_FINITE(y) && Py_IS_FINITE(z)) {
2357 /* Infinite result from finite inputs. */
2358 PyErr_SetString(PyExc_OverflowError, "overflow in fma");
2359 return NULL;
2360 }
2361
2362 return PyFloat_FromDouble(r);
2363 }
2364
2365
2366 /*[clinic input]
2367 math.fmod
2368
2369 x: double
2370 y: double
2371 /
2372
2373 Return fmod(x, y), according to platform C.
2374
2375 x % y may differ.
2376 [clinic start generated code]*/
2377
2378 static PyObject *
math_fmod_impl(PyObject * module,double x,double y)2379 math_fmod_impl(PyObject *module, double x, double y)
2380 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2381 {
2382 double r;
2383 /* fmod(x, +/-Inf) returns x for finite x. */
2384 if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2385 return PyFloat_FromDouble(x);
2386 errno = 0;
2387 r = fmod(x, y);
2388 #ifdef _MSC_VER
2389 /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2390 for zero result. But C99+ says: "if y is nonzero, the result
2391 has the same sign as x".
2392 */
2393 if (r == 0.0 && y != 0.0) {
2394 r = copysign(r, x);
2395 }
2396 #endif
2397 if (Py_IS_NAN(r)) {
2398 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2399 errno = EDOM;
2400 else
2401 errno = 0;
2402 }
2403 if (errno && is_error(r))
2404 return NULL;
2405 else
2406 return PyFloat_FromDouble(r);
2407 }
2408
2409 /*
2410 Given a *vec* of values, compute the vector norm:
2411
2412 sqrt(sum(x ** 2 for x in vec))
2413
2414 The *max* variable should be equal to the largest fabs(x).
2415 The *n* variable is the length of *vec*.
2416 If n==0, then *max* should be 0.0.
2417 If an infinity is present in the vec, *max* should be INF.
2418 The *found_nan* variable indicates whether some member of
2419 the *vec* is a NaN.
2420
2421 To avoid overflow/underflow and to achieve high accuracy giving results
2422 that are almost always correctly rounded, four techniques are used:
2423
2424 * lossless scaling using a power-of-two scaling factor
2425 * accurate squaring using Veltkamp-Dekker splitting [1]
2426 or an equivalent with an fma() call
2427 * compensated summation using a variant of the Neumaier algorithm [2]
2428 * differential correction of the square root [3]
2429
2430 The usual presentation of the Neumaier summation algorithm has an
2431 expensive branch depending on which operand has the larger
2432 magnitude. We avoid this cost by arranging the calculation so that
2433 fabs(csum) is always as large as fabs(x).
2434
2435 To establish the invariant, *csum* is initialized to 1.0 which is
2436 always larger than x**2 after scaling or after division by *max*.
2437 After the loop is finished, the initial 1.0 is subtracted out for a
2438 net zero effect on the final sum. Since *csum* will be greater than
2439 1.0, the subtraction of 1.0 will not cause fractional digits to be
2440 dropped from *csum*.
2441
2442 To get the full benefit from compensated summation, the largest
2443 addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2444 scaling or division by *max* should not be skipped even if not
2445 otherwise needed to prevent overflow or loss of precision.
2446
2447 The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
2448 gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2449 algorithm gives a *hi* value that is correctly rounded to half
2450 precision. When a value at or below 1.0 is correctly rounded, it
2451 never goes above 1.0. And when values at or below 1.0 are squared,
2452 they remain at or below 1.0, thus preserving the summation invariant.
2453
2454 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2455 each scaled vector element has a magnitude less than 1.0. After the
2456 Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2457 value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2458 Given that csum >= 1.0, we have:
2459 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2460 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2461
2462 To minimize loss of information during the accumulation of fractional
2463 values, each term has a separate accumulator. This also breaks up
2464 sequential dependencies in the inner loop so the CPU can maximize
2465 floating-point throughput. [4] On an Apple M1 Max, hypot(*vec)
2466 takes only 3.33 µsec when len(vec) == 1000.
2467
2468 The square root differential correction is needed because a
2469 correctly rounded square root of a correctly rounded sum of
2470 squares can still be off by as much as one ulp.
2471
2472 The differential correction starts with a value *x* that is
2473 the difference between the square of *h*, the possibly inaccurately
2474 rounded square root, and the accurately computed sum of squares.
2475 The correction is the first order term of the Maclaurin series
2476 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2477
2478 Essentially, this differential correction is equivalent to one
2479 refinement step in Newton's divide-and-average square root
2480 algorithm, effectively doubling the number of accurate bits.
2481 This technique is used in Dekker's SQRT2 algorithm and again in
2482 Borges' ALGORITHM 4 and 5.
2483
2484 The hypot() function is faithfully rounded (less than 1 ulp error)
2485 and usually correctly rounded (within 1/2 ulp). The squaring
2486 step is exact. The Neumaier summation computes as if in doubled
2487 precision (106 bits) and has the advantage that its input squares
2488 are non-negative so that the condition number of the sum is one.
2489 The square root with a differential correction is likewise computed
2490 as if in doubled precision.
2491
2492 For n <= 1000, prior to the final addition that rounds the overall
2493 result, the internal accuracy of "h" together with its correction of
2494 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2495 against a Decimal implementation with prec=300. After 100 million
2496 trials, no incorrectly rounded examples were found. In addition,
2497 perfect commutativity (all permutations are exactly equal) was
2498 verified for 1 billion random inputs with n=5. [7]
2499
2500 References:
2501
2502 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2503 2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2504 3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
2505 4. Data dependency graph: https://bugs.python.org/file49439/hypot.png
2506 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2507 6. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
2508 7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
2509
2510 */
2511
2512 static inline double
vector_norm(Py_ssize_t n,double * vec,double max,int found_nan)2513 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2514 {
2515 double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2516 DoubleLength pr, sm;
2517 int max_e;
2518 Py_ssize_t i;
2519
2520 if (Py_IS_INFINITY(max)) {
2521 return max;
2522 }
2523 if (found_nan) {
2524 return Py_NAN;
2525 }
2526 if (max == 0.0 || n <= 1) {
2527 return max;
2528 }
2529 frexp(max, &max_e);
2530 if (max_e < -1023) {
2531 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
2532 for (i=0 ; i < n ; i++) {
2533 vec[i] /= DBL_MIN; // convert subnormals to normals
2534 }
2535 return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2536 }
2537 scale = ldexp(1.0, -max_e);
2538 assert(max * scale >= 0.5);
2539 assert(max * scale < 1.0);
2540 for (i=0 ; i < n ; i++) {
2541 x = vec[i];
2542 assert(Py_IS_FINITE(x) && fabs(x) <= max);
2543 x *= scale; // lossless scaling
2544 assert(fabs(x) < 1.0);
2545 pr = dl_mul(x, x); // lossless squaring
2546 assert(pr.hi <= 1.0);
2547 sm = dl_fast_sum(csum, pr.hi); // lossless addition
2548 csum = sm.hi;
2549 frac1 += pr.lo; // lossy addition
2550 frac2 += sm.lo; // lossy addition
2551 }
2552 h = sqrt(csum - 1.0 + (frac1 + frac2));
2553 pr = dl_mul(-h, h);
2554 sm = dl_fast_sum(csum, pr.hi);
2555 csum = sm.hi;
2556 frac1 += pr.lo;
2557 frac2 += sm.lo;
2558 x = csum - 1.0 + (frac1 + frac2);
2559 h += x / (2.0 * h); // differential correction
2560 return h / scale;
2561 }
2562
2563 #define NUM_STACK_ELEMS 16
2564
2565 /*[clinic input]
2566 math.dist
2567
2568 p: object
2569 q: object
2570 /
2571
2572 Return the Euclidean distance between two points p and q.
2573
2574 The points should be specified as sequences (or iterables) of
2575 coordinates. Both inputs must have the same dimension.
2576
2577 Roughly equivalent to:
2578 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2579 [clinic start generated code]*/
2580
2581 static PyObject *
math_dist_impl(PyObject * module,PyObject * p,PyObject * q)2582 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2583 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2584 {
2585 PyObject *item;
2586 double max = 0.0;
2587 double x, px, qx, result;
2588 Py_ssize_t i, m, n;
2589 int found_nan = 0, p_allocated = 0, q_allocated = 0;
2590 double diffs_on_stack[NUM_STACK_ELEMS];
2591 double *diffs = diffs_on_stack;
2592
2593 if (!PyTuple_Check(p)) {
2594 p = PySequence_Tuple(p);
2595 if (p == NULL) {
2596 return NULL;
2597 }
2598 p_allocated = 1;
2599 }
2600 if (!PyTuple_Check(q)) {
2601 q = PySequence_Tuple(q);
2602 if (q == NULL) {
2603 if (p_allocated) {
2604 Py_DECREF(p);
2605 }
2606 return NULL;
2607 }
2608 q_allocated = 1;
2609 }
2610
2611 m = PyTuple_GET_SIZE(p);
2612 n = PyTuple_GET_SIZE(q);
2613 if (m != n) {
2614 PyErr_SetString(PyExc_ValueError,
2615 "both points must have the same number of dimensions");
2616 goto error_exit;
2617 }
2618 if (n > NUM_STACK_ELEMS) {
2619 diffs = (double *) PyMem_Malloc(n * sizeof(double));
2620 if (diffs == NULL) {
2621 PyErr_NoMemory();
2622 goto error_exit;
2623 }
2624 }
2625 for (i=0 ; i<n ; i++) {
2626 item = PyTuple_GET_ITEM(p, i);
2627 ASSIGN_DOUBLE(px, item, error_exit);
2628 item = PyTuple_GET_ITEM(q, i);
2629 ASSIGN_DOUBLE(qx, item, error_exit);
2630 x = fabs(px - qx);
2631 diffs[i] = x;
2632 found_nan |= Py_IS_NAN(x);
2633 if (x > max) {
2634 max = x;
2635 }
2636 }
2637 result = vector_norm(n, diffs, max, found_nan);
2638 if (diffs != diffs_on_stack) {
2639 PyMem_Free(diffs);
2640 }
2641 if (p_allocated) {
2642 Py_DECREF(p);
2643 }
2644 if (q_allocated) {
2645 Py_DECREF(q);
2646 }
2647 return PyFloat_FromDouble(result);
2648
2649 error_exit:
2650 if (diffs != diffs_on_stack) {
2651 PyMem_Free(diffs);
2652 }
2653 if (p_allocated) {
2654 Py_DECREF(p);
2655 }
2656 if (q_allocated) {
2657 Py_DECREF(q);
2658 }
2659 return NULL;
2660 }
2661
2662 /* AC: cannot convert yet, waiting for *args support */
2663 static PyObject *
math_hypot(PyObject * self,PyObject * const * args,Py_ssize_t nargs)2664 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2665 {
2666 Py_ssize_t i;
2667 PyObject *item;
2668 double max = 0.0;
2669 double x, result;
2670 int found_nan = 0;
2671 double coord_on_stack[NUM_STACK_ELEMS];
2672 double *coordinates = coord_on_stack;
2673
2674 if (nargs > NUM_STACK_ELEMS) {
2675 coordinates = (double *) PyMem_Malloc(nargs * sizeof(double));
2676 if (coordinates == NULL) {
2677 return PyErr_NoMemory();
2678 }
2679 }
2680 for (i = 0; i < nargs; i++) {
2681 item = args[i];
2682 ASSIGN_DOUBLE(x, item, error_exit);
2683 x = fabs(x);
2684 coordinates[i] = x;
2685 found_nan |= Py_IS_NAN(x);
2686 if (x > max) {
2687 max = x;
2688 }
2689 }
2690 result = vector_norm(nargs, coordinates, max, found_nan);
2691 if (coordinates != coord_on_stack) {
2692 PyMem_Free(coordinates);
2693 }
2694 return PyFloat_FromDouble(result);
2695
2696 error_exit:
2697 if (coordinates != coord_on_stack) {
2698 PyMem_Free(coordinates);
2699 }
2700 return NULL;
2701 }
2702
2703 #undef NUM_STACK_ELEMS
2704
2705 PyDoc_STRVAR(math_hypot_doc,
2706 "hypot(*coordinates) -> value\n\n\
2707 Multidimensional Euclidean distance from the origin to a point.\n\
2708 \n\
2709 Roughly equivalent to:\n\
2710 sqrt(sum(x**2 for x in coordinates))\n\
2711 \n\
2712 For a two dimensional point (x, y), gives the hypotenuse\n\
2713 using the Pythagorean theorem: sqrt(x*x + y*y).\n\
2714 \n\
2715 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2716 \n\
2717 >>> hypot(3.0, 4.0)\n\
2718 5.0\n\
2719 ");
2720
2721 /** sumprod() ***************************************************************/
2722
2723 /* Forward declaration */
2724 static inline int _check_long_mult_overflow(long a, long b);
2725
2726 static inline bool
long_add_would_overflow(long a,long b)2727 long_add_would_overflow(long a, long b)
2728 {
2729 return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2730 }
2731
2732 /*[clinic input]
2733 math.sumprod
2734
2735 p: object
2736 q: object
2737 /
2738
2739 Return the sum of products of values from two iterables p and q.
2740
2741 Roughly equivalent to:
2742
2743 sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
2744
2745 For float and mixed int/float inputs, the intermediate products
2746 and sums are computed with extended precision.
2747 [clinic start generated code]*/
2748
2749 static PyObject *
math_sumprod_impl(PyObject * module,PyObject * p,PyObject * q)2750 math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2751 /*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
2752 {
2753 PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2754 PyObject *p_it, *q_it, *total;
2755 iternextfunc p_next, q_next;
2756 bool p_stopped = false, q_stopped = false;
2757 bool int_path_enabled = true, int_total_in_use = false;
2758 bool flt_path_enabled = true, flt_total_in_use = false;
2759 long int_total = 0;
2760 TripleLength flt_total = tl_zero;
2761
2762 p_it = PyObject_GetIter(p);
2763 if (p_it == NULL) {
2764 return NULL;
2765 }
2766 q_it = PyObject_GetIter(q);
2767 if (q_it == NULL) {
2768 Py_DECREF(p_it);
2769 return NULL;
2770 }
2771 total = PyLong_FromLong(0);
2772 if (total == NULL) {
2773 Py_DECREF(p_it);
2774 Py_DECREF(q_it);
2775 return NULL;
2776 }
2777 p_next = *Py_TYPE(p_it)->tp_iternext;
2778 q_next = *Py_TYPE(q_it)->tp_iternext;
2779 while (1) {
2780 bool finished;
2781
2782 assert (p_i == NULL);
2783 assert (q_i == NULL);
2784 assert (term_i == NULL);
2785 assert (new_total == NULL);
2786
2787 assert (p_it != NULL);
2788 assert (q_it != NULL);
2789 assert (total != NULL);
2790
2791 p_i = p_next(p_it);
2792 if (p_i == NULL) {
2793 if (PyErr_Occurred()) {
2794 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2795 goto err_exit;
2796 }
2797 PyErr_Clear();
2798 }
2799 p_stopped = true;
2800 }
2801 q_i = q_next(q_it);
2802 if (q_i == NULL) {
2803 if (PyErr_Occurred()) {
2804 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2805 goto err_exit;
2806 }
2807 PyErr_Clear();
2808 }
2809 q_stopped = true;
2810 }
2811 if (p_stopped != q_stopped) {
2812 PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2813 goto err_exit;
2814 }
2815 finished = p_stopped & q_stopped;
2816
2817 if (int_path_enabled) {
2818
2819 if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2820 int overflow;
2821 long int_p, int_q, int_prod;
2822
2823 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2824 if (overflow) {
2825 goto finalize_int_path;
2826 }
2827 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2828 if (overflow) {
2829 goto finalize_int_path;
2830 }
2831 if (_check_long_mult_overflow(int_p, int_q)) {
2832 goto finalize_int_path;
2833 }
2834 int_prod = int_p * int_q;
2835 if (long_add_would_overflow(int_total, int_prod)) {
2836 goto finalize_int_path;
2837 }
2838 int_total += int_prod;
2839 int_total_in_use = true;
2840 Py_CLEAR(p_i);
2841 Py_CLEAR(q_i);
2842 continue;
2843 }
2844
2845 finalize_int_path:
2846 // We're finished, overflowed, or have a non-int
2847 int_path_enabled = false;
2848 if (int_total_in_use) {
2849 term_i = PyLong_FromLong(int_total);
2850 if (term_i == NULL) {
2851 goto err_exit;
2852 }
2853 new_total = PyNumber_Add(total, term_i);
2854 if (new_total == NULL) {
2855 goto err_exit;
2856 }
2857 Py_SETREF(total, new_total);
2858 new_total = NULL;
2859 Py_CLEAR(term_i);
2860 int_total = 0; // An ounce of prevention, ...
2861 int_total_in_use = false;
2862 }
2863 }
2864
2865 if (flt_path_enabled) {
2866
2867 if (!finished) {
2868 double flt_p, flt_q;
2869 bool p_type_float = PyFloat_CheckExact(p_i);
2870 bool q_type_float = PyFloat_CheckExact(q_i);
2871 if (p_type_float && q_type_float) {
2872 flt_p = PyFloat_AS_DOUBLE(p_i);
2873 flt_q = PyFloat_AS_DOUBLE(q_i);
2874 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2875 /* We care about float/int pairs and int/float pairs because
2876 they arise naturally in several use cases such as price
2877 times quantity, measurements with integer weights, or
2878 data selected by a vector of bools. */
2879 flt_p = PyFloat_AS_DOUBLE(p_i);
2880 flt_q = PyLong_AsDouble(q_i);
2881 if (flt_q == -1.0 && PyErr_Occurred()) {
2882 PyErr_Clear();
2883 goto finalize_flt_path;
2884 }
2885 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
2886 flt_q = PyFloat_AS_DOUBLE(q_i);
2887 flt_p = PyLong_AsDouble(p_i);
2888 if (flt_p == -1.0 && PyErr_Occurred()) {
2889 PyErr_Clear();
2890 goto finalize_flt_path;
2891 }
2892 } else {
2893 goto finalize_flt_path;
2894 }
2895 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2896 if (isfinite(new_flt_total.hi)) {
2897 flt_total = new_flt_total;
2898 flt_total_in_use = true;
2899 Py_CLEAR(p_i);
2900 Py_CLEAR(q_i);
2901 continue;
2902 }
2903 }
2904
2905 finalize_flt_path:
2906 // We're finished, overflowed, have a non-float, or got a non-finite value
2907 flt_path_enabled = false;
2908 if (flt_total_in_use) {
2909 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2910 if (term_i == NULL) {
2911 goto err_exit;
2912 }
2913 new_total = PyNumber_Add(total, term_i);
2914 if (new_total == NULL) {
2915 goto err_exit;
2916 }
2917 Py_SETREF(total, new_total);
2918 new_total = NULL;
2919 Py_CLEAR(term_i);
2920 flt_total = tl_zero;
2921 flt_total_in_use = false;
2922 }
2923 }
2924
2925 assert(!int_total_in_use);
2926 assert(!flt_total_in_use);
2927 if (finished) {
2928 goto normal_exit;
2929 }
2930 term_i = PyNumber_Multiply(p_i, q_i);
2931 if (term_i == NULL) {
2932 goto err_exit;
2933 }
2934 new_total = PyNumber_Add(total, term_i);
2935 if (new_total == NULL) {
2936 goto err_exit;
2937 }
2938 Py_SETREF(total, new_total);
2939 new_total = NULL;
2940 Py_CLEAR(p_i);
2941 Py_CLEAR(q_i);
2942 Py_CLEAR(term_i);
2943 }
2944
2945 normal_exit:
2946 Py_DECREF(p_it);
2947 Py_DECREF(q_it);
2948 return total;
2949
2950 err_exit:
2951 Py_DECREF(p_it);
2952 Py_DECREF(q_it);
2953 Py_DECREF(total);
2954 Py_XDECREF(p_i);
2955 Py_XDECREF(q_i);
2956 Py_XDECREF(term_i);
2957 Py_XDECREF(new_total);
2958 return NULL;
2959 }
2960
2961
2962 /* pow can't use math_2, but needs its own wrapper: the problem is
2963 that an infinite result can arise either as a result of overflow
2964 (in which case OverflowError should be raised) or as a result of
2965 e.g. 0.**-5. (for which ValueError needs to be raised.)
2966 */
2967
2968 /*[clinic input]
2969 math.pow
2970
2971 x: double
2972 y: double
2973 /
2974
2975 Return x**y (x to the power of y).
2976 [clinic start generated code]*/
2977
2978 static PyObject *
math_pow_impl(PyObject * module,double x,double y)2979 math_pow_impl(PyObject *module, double x, double y)
2980 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2981 {
2982 double r;
2983 int odd_y;
2984
2985 /* deal directly with IEEE specials, to cope with problems on various
2986 platforms whose semantics don't exactly match C99 */
2987 r = 0.; /* silence compiler warning */
2988 if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2989 errno = 0;
2990 if (Py_IS_NAN(x))
2991 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2992 else if (Py_IS_NAN(y))
2993 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2994 else if (Py_IS_INFINITY(x)) {
2995 odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2996 if (y > 0.)
2997 r = odd_y ? x : fabs(x);
2998 else if (y == 0.)
2999 r = 1.;
3000 else /* y < 0. */
3001 r = odd_y ? copysign(0., x) : 0.;
3002 }
3003 else {
3004 assert(Py_IS_INFINITY(y));
3005 if (fabs(x) == 1.0)
3006 r = 1.;
3007 else if (y > 0. && fabs(x) > 1.0)
3008 r = y;
3009 else if (y < 0. && fabs(x) < 1.0) {
3010 r = -y; /* result is +inf */
3011 }
3012 else
3013 r = 0.;
3014 }
3015 }
3016 else {
3017 /* let libm handle finite**finite */
3018 errno = 0;
3019 r = pow(x, y);
3020 /* a NaN result should arise only from (-ve)**(finite
3021 non-integer); in this case we want to raise ValueError. */
3022 if (!Py_IS_FINITE(r)) {
3023 if (Py_IS_NAN(r)) {
3024 errno = EDOM;
3025 }
3026 /*
3027 an infinite result here arises either from:
3028 (A) (+/-0.)**negative (-> divide-by-zero)
3029 (B) overflow of x**y with x and y finite
3030 */
3031 else if (Py_IS_INFINITY(r)) {
3032 if (x == 0.)
3033 errno = EDOM;
3034 else
3035 errno = ERANGE;
3036 }
3037 }
3038 }
3039
3040 if (errno && is_error(r))
3041 return NULL;
3042 else
3043 return PyFloat_FromDouble(r);
3044 }
3045
3046
3047 static const double degToRad = Py_MATH_PI / 180.0;
3048 static const double radToDeg = 180.0 / Py_MATH_PI;
3049
3050 /*[clinic input]
3051 math.degrees
3052
3053 x: double
3054 /
3055
3056 Convert angle x from radians to degrees.
3057 [clinic start generated code]*/
3058
3059 static PyObject *
math_degrees_impl(PyObject * module,double x)3060 math_degrees_impl(PyObject *module, double x)
3061 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3062 {
3063 return PyFloat_FromDouble(x * radToDeg);
3064 }
3065
3066
3067 /*[clinic input]
3068 math.radians
3069
3070 x: double
3071 /
3072
3073 Convert angle x from degrees to radians.
3074 [clinic start generated code]*/
3075
3076 static PyObject *
math_radians_impl(PyObject * module,double x)3077 math_radians_impl(PyObject *module, double x)
3078 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3079 {
3080 return PyFloat_FromDouble(x * degToRad);
3081 }
3082
3083
3084 /*[clinic input]
3085 math.isfinite
3086
3087 x: double
3088 /
3089
3090 Return True if x is neither an infinity nor a NaN, and False otherwise.
3091 [clinic start generated code]*/
3092
3093 static PyObject *
math_isfinite_impl(PyObject * module,double x)3094 math_isfinite_impl(PyObject *module, double x)
3095 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3096 {
3097 return PyBool_FromLong((long)Py_IS_FINITE(x));
3098 }
3099
3100
3101 /*[clinic input]
3102 math.isnan
3103
3104 x: double
3105 /
3106
3107 Return True if x is a NaN (not a number), and False otherwise.
3108 [clinic start generated code]*/
3109
3110 static PyObject *
math_isnan_impl(PyObject * module,double x)3111 math_isnan_impl(PyObject *module, double x)
3112 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3113 {
3114 return PyBool_FromLong((long)Py_IS_NAN(x));
3115 }
3116
3117
3118 /*[clinic input]
3119 math.isinf
3120
3121 x: double
3122 /
3123
3124 Return True if x is a positive or negative infinity, and False otherwise.
3125 [clinic start generated code]*/
3126
3127 static PyObject *
math_isinf_impl(PyObject * module,double x)3128 math_isinf_impl(PyObject *module, double x)
3129 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3130 {
3131 return PyBool_FromLong((long)Py_IS_INFINITY(x));
3132 }
3133
3134
3135 /*[clinic input]
3136 math.isclose -> bool
3137
3138 a: double
3139 b: double
3140 *
3141 rel_tol: double = 1e-09
3142 maximum difference for being considered "close", relative to the
3143 magnitude of the input values
3144 abs_tol: double = 0.0
3145 maximum difference for being considered "close", regardless of the
3146 magnitude of the input values
3147
3148 Determine whether two floating-point numbers are close in value.
3149
3150 Return True if a is close in value to b, and False otherwise.
3151
3152 For the values to be considered close, the difference between them
3153 must be smaller than at least one of the tolerances.
3154
3155 -inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3156 is, NaN is not close to anything, even itself. inf and -inf are
3157 only close to themselves.
3158 [clinic start generated code]*/
3159
3160 static int
math_isclose_impl(PyObject * module,double a,double b,double rel_tol,double abs_tol)3161 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3162 double abs_tol)
3163 /*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
3164 {
3165 double diff = 0.0;
3166
3167 /* sanity check on the inputs */
3168 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3169 PyErr_SetString(PyExc_ValueError,
3170 "tolerances must be non-negative");
3171 return -1;
3172 }
3173
3174 if ( a == b ) {
3175 /* short circuit exact equality -- needed to catch two infinities of
3176 the same sign. And perhaps speeds things up a bit sometimes.
3177 */
3178 return 1;
3179 }
3180
3181 /* This catches the case of two infinities of opposite sign, or
3182 one infinity and one finite number. Two infinities of opposite
3183 sign would otherwise have an infinite relative tolerance.
3184 Two infinities of the same sign are caught by the equality check
3185 above.
3186 */
3187
3188 if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3189 return 0;
3190 }
3191
3192 /* now do the regular computation
3193 this is essentially the "weak" test from the Boost library
3194 */
3195
3196 diff = fabs(b - a);
3197
3198 return (((diff <= fabs(rel_tol * b)) ||
3199 (diff <= fabs(rel_tol * a))) ||
3200 (diff <= abs_tol));
3201 }
3202
3203 static inline int
_check_long_mult_overflow(long a,long b)3204 _check_long_mult_overflow(long a, long b) {
3205
3206 /* From Python2's int_mul code:
3207
3208 Integer overflow checking for * is painful: Python tried a couple ways, but
3209 they didn't work on all platforms, or failed in endcases (a product of
3210 -sys.maxint-1 has been a particular pain).
3211
3212 Here's another way:
3213
3214 The native long product x*y is either exactly right or *way* off, being
3215 just the last n bits of the true product, where n is the number of bits
3216 in a long (the delivered product is the true product plus i*2**n for
3217 some integer i).
3218
3219 The native double product (double)x * (double)y is subject to three
3220 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3221 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3222 But, unlike the native long product, it's not in *range* trouble: even
3223 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3224 dynamic range of a double. So the leading 50 (or so) bits of the double
3225 product are correct.
3226
3227 We check these two ways against each other, and declare victory if they're
3228 approximately the same. Else, because the native long product is the only
3229 one that can lose catastrophic amounts of information, it's the native long
3230 product that must have overflowed.
3231
3232 */
3233
3234 long longprod = (long)((unsigned long)a * b);
3235 double doubleprod = (double)a * (double)b;
3236 double doubled_longprod = (double)longprod;
3237
3238 if (doubled_longprod == doubleprod) {
3239 return 0;
3240 }
3241
3242 const double diff = doubled_longprod - doubleprod;
3243 const double absdiff = diff >= 0.0 ? diff : -diff;
3244 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3245
3246 if (32.0 * absdiff <= absprod) {
3247 return 0;
3248 }
3249
3250 return 1;
3251 }
3252
3253 /*[clinic input]
3254 math.prod
3255
3256 iterable: object
3257 /
3258 *
3259 start: object(c_default="NULL") = 1
3260
3261 Calculate the product of all the elements in the input iterable.
3262
3263 The default start value for the product is 1.
3264
3265 When the iterable is empty, return the start value. This function is
3266 intended specifically for use with numeric values and may reject
3267 non-numeric types.
3268 [clinic start generated code]*/
3269
3270 static PyObject *
math_prod_impl(PyObject * module,PyObject * iterable,PyObject * start)3271 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3272 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3273 {
3274 PyObject *result = start;
3275 PyObject *temp, *item, *iter;
3276
3277 iter = PyObject_GetIter(iterable);
3278 if (iter == NULL) {
3279 return NULL;
3280 }
3281
3282 if (result == NULL) {
3283 result = _PyLong_GetOne();
3284 }
3285 Py_INCREF(result);
3286 #ifndef SLOW_PROD
3287 /* Fast paths for integers keeping temporary products in C.
3288 * Assumes all inputs are the same type.
3289 * If the assumption fails, default to use PyObjects instead.
3290 */
3291 if (PyLong_CheckExact(result)) {
3292 int overflow;
3293 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3294 /* If this already overflowed, don't even enter the loop. */
3295 if (overflow == 0) {
3296 Py_SETREF(result, NULL);
3297 }
3298 /* Loop over all the items in the iterable until we finish, we overflow
3299 * or we found a non integer element */
3300 while (result == NULL) {
3301 item = PyIter_Next(iter);
3302 if (item == NULL) {
3303 Py_DECREF(iter);
3304 if (PyErr_Occurred()) {
3305 return NULL;
3306 }
3307 return PyLong_FromLong(i_result);
3308 }
3309 if (PyLong_CheckExact(item)) {
3310 long b = PyLong_AsLongAndOverflow(item, &overflow);
3311 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3312 long x = i_result * b;
3313 i_result = x;
3314 Py_DECREF(item);
3315 continue;
3316 }
3317 }
3318 /* Either overflowed or is not an int.
3319 * Restore real objects and process normally */
3320 result = PyLong_FromLong(i_result);
3321 if (result == NULL) {
3322 Py_DECREF(item);
3323 Py_DECREF(iter);
3324 return NULL;
3325 }
3326 temp = PyNumber_Multiply(result, item);
3327 Py_DECREF(result);
3328 Py_DECREF(item);
3329 result = temp;
3330 if (result == NULL) {
3331 Py_DECREF(iter);
3332 return NULL;
3333 }
3334 }
3335 }
3336
3337 /* Fast paths for floats keeping temporary products in C.
3338 * Assumes all inputs are the same type.
3339 * If the assumption fails, default to use PyObjects instead.
3340 */
3341 if (PyFloat_CheckExact(result)) {
3342 double f_result = PyFloat_AS_DOUBLE(result);
3343 Py_SETREF(result, NULL);
3344 while(result == NULL) {
3345 item = PyIter_Next(iter);
3346 if (item == NULL) {
3347 Py_DECREF(iter);
3348 if (PyErr_Occurred()) {
3349 return NULL;
3350 }
3351 return PyFloat_FromDouble(f_result);
3352 }
3353 if (PyFloat_CheckExact(item)) {
3354 f_result *= PyFloat_AS_DOUBLE(item);
3355 Py_DECREF(item);
3356 continue;
3357 }
3358 if (PyLong_CheckExact(item)) {
3359 long value;
3360 int overflow;
3361 value = PyLong_AsLongAndOverflow(item, &overflow);
3362 if (!overflow) {
3363 f_result *= (double)value;
3364 Py_DECREF(item);
3365 continue;
3366 }
3367 }
3368 result = PyFloat_FromDouble(f_result);
3369 if (result == NULL) {
3370 Py_DECREF(item);
3371 Py_DECREF(iter);
3372 return NULL;
3373 }
3374 temp = PyNumber_Multiply(result, item);
3375 Py_DECREF(result);
3376 Py_DECREF(item);
3377 result = temp;
3378 if (result == NULL) {
3379 Py_DECREF(iter);
3380 return NULL;
3381 }
3382 }
3383 }
3384 #endif
3385 /* Consume rest of the iterable (if any) that could not be handled
3386 * by specialized functions above.*/
3387 for(;;) {
3388 item = PyIter_Next(iter);
3389 if (item == NULL) {
3390 /* error, or end-of-sequence */
3391 if (PyErr_Occurred()) {
3392 Py_SETREF(result, NULL);
3393 }
3394 break;
3395 }
3396 temp = PyNumber_Multiply(result, item);
3397 Py_DECREF(result);
3398 Py_DECREF(item);
3399 result = temp;
3400 if (result == NULL)
3401 break;
3402 }
3403 Py_DECREF(iter);
3404 return result;
3405 }
3406
3407
3408 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3409
3410 Python code to generate the values:
3411
3412 import math
3413
3414 for n in range(128):
3415 fac = math.factorial(n)
3416 fac_odd_part = fac // (fac & -fac)
3417 reduced_fac_odd_part = fac_odd_part % (2**64)
3418 print(f"{reduced_fac_odd_part:#018x}u")
3419 */
3420 static const uint64_t reduced_factorial_odd_part[] = {
3421 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3422 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3423 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3424 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3425 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3426 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3427 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3428 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3429 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3430 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3431 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3432 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3433 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3434 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3435 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3436 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3437 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3438 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3439 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3440 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3441 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3442 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3443 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3444 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3445 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3446 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3447 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3448 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3449 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3450 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3451 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3452 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3453 };
3454
3455 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3456
3457 Python code to generate the values:
3458
3459 import math
3460
3461 for n in range(128):
3462 fac = math.factorial(n)
3463 fac_odd_part = fac // (fac & -fac)
3464 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3465 print(f"{inverted_fac_odd_part:#018x}u")
3466 */
3467 static const uint64_t inverted_factorial_odd_part[] = {
3468 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3469 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3470 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3471 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3472 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3473 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3474 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3475 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3476 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3477 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3478 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3479 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3480 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3481 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3482 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3483 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3484 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3485 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3486 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3487 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3488 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3489 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3490 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3491 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3492 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3493 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3494 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3495 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3496 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3497 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3498 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3499 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3500 };
3501
3502 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3503
3504 Python code to generate the values:
3505
3506 import math
3507
3508 for n in range(128):
3509 fac = math.factorial(n)
3510 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3511 print(fac_trailing_zeros)
3512 */
3513
3514 static const uint8_t factorial_trailing_zeros[] = {
3515 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3516 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3517 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3518 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3519 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3520 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3521 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3522 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3523 };
3524
3525 /* Number of permutations and combinations.
3526 * P(n, k) = n! / (n-k)!
3527 * C(n, k) = P(n, k) / k!
3528 */
3529
3530 /* Calculate C(n, k) for n in the 63-bit range. */
3531 static PyObject *
perm_comb_small(unsigned long long n,unsigned long long k,int iscomb)3532 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3533 {
3534 assert(k != 0);
3535
3536 /* For small enough n and k the result fits in the 64-bit range and can
3537 * be calculated without allocating intermediate PyLong objects. */
3538 if (iscomb) {
3539 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3540 * fits into a uint64_t. Exclude k = 1, because the second fast
3541 * path is faster for this case.*/
3542 static const unsigned char fast_comb_limits1[] = {
3543 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3544 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3545 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3546 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3547 67, 67, 67, // 32-34
3548 };
3549 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3550 /*
3551 comb(n, k) fits into a uint64_t. We compute it as
3552
3553 comb_odd_part << shift
3554
3555 where 2**shift is the largest power of two dividing comb(n, k)
3556 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3557 calculated efficiently via arithmetic modulo 2**64, using three
3558 lookups and two uint64_t multiplications.
3559 */
3560 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3561 * inverted_factorial_odd_part[k]
3562 * inverted_factorial_odd_part[n - k];
3563 int shift = factorial_trailing_zeros[n]
3564 - factorial_trailing_zeros[k]
3565 - factorial_trailing_zeros[n - k];
3566 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3567 }
3568
3569 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3570 * fits into a long long (which is at least 64 bit). Only contains
3571 * items larger than in fast_comb_limits1. */
3572 static const unsigned long long fast_comb_limits2[] = {
3573 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3574 746, 453, 308, 227, 178, 147, // 8-13
3575 };
3576 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3577 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3578 unsigned long long result = n;
3579 for (unsigned long long i = 1; i < k;) {
3580 result *= --n;
3581 result /= ++i;
3582 }
3583 return PyLong_FromUnsignedLongLong(result);
3584 }
3585 }
3586 else {
3587 /* Maps k to the maximal n so that k <= n and P(n, k)
3588 * fits into a long long (which is at least 64 bit). */
3589 static const unsigned long long fast_perm_limits[] = {
3590 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3591 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3592 24, 22, 21, 20, 20, // 16-20
3593 };
3594 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3595 if (n <= 127) {
3596 /* P(n, k) fits into a uint64_t. */
3597 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3598 * inverted_factorial_odd_part[n - k];
3599 int shift = factorial_trailing_zeros[n]
3600 - factorial_trailing_zeros[n - k];
3601 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3602 }
3603
3604 /* P(n, k) = P(n, k-1) * (n-k+1) */
3605 unsigned long long result = n;
3606 for (unsigned long long i = 1; i < k;) {
3607 result *= --n;
3608 ++i;
3609 }
3610 return PyLong_FromUnsignedLongLong(result);
3611 }
3612 }
3613
3614 /* For larger n use recursive formulas:
3615 *
3616 * P(n, k) = P(n, j) * P(n-j, k-j)
3617 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3618 */
3619 unsigned long long j = k / 2;
3620 PyObject *a, *b;
3621 a = perm_comb_small(n, j, iscomb);
3622 if (a == NULL) {
3623 return NULL;
3624 }
3625 b = perm_comb_small(n - j, k - j, iscomb);
3626 if (b == NULL) {
3627 goto error;
3628 }
3629 Py_SETREF(a, PyNumber_Multiply(a, b));
3630 Py_DECREF(b);
3631 if (iscomb && a != NULL) {
3632 b = perm_comb_small(k, j, 1);
3633 if (b == NULL) {
3634 goto error;
3635 }
3636 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3637 Py_DECREF(b);
3638 }
3639 return a;
3640
3641 error:
3642 Py_DECREF(a);
3643 return NULL;
3644 }
3645
3646 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3647 * It is more efficient than sequential multiplication thanks to
3648 * Karatsuba multiplication.
3649 */
3650 static PyObject *
perm_comb(PyObject * n,unsigned long long k,int iscomb)3651 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3652 {
3653 if (k == 0) {
3654 return PyLong_FromLong(1);
3655 }
3656 if (k == 1) {
3657 return Py_NewRef(n);
3658 }
3659
3660 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3661 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3662 unsigned long long j = k / 2;
3663 PyObject *a, *b;
3664 a = perm_comb(n, j, iscomb);
3665 if (a == NULL) {
3666 return NULL;
3667 }
3668 PyObject *t = PyLong_FromUnsignedLongLong(j);
3669 if (t == NULL) {
3670 goto error;
3671 }
3672 n = PyNumber_Subtract(n, t);
3673 Py_DECREF(t);
3674 if (n == NULL) {
3675 goto error;
3676 }
3677 b = perm_comb(n, k - j, iscomb);
3678 Py_DECREF(n);
3679 if (b == NULL) {
3680 goto error;
3681 }
3682 Py_SETREF(a, PyNumber_Multiply(a, b));
3683 Py_DECREF(b);
3684 if (iscomb && a != NULL) {
3685 b = perm_comb_small(k, j, 1);
3686 if (b == NULL) {
3687 goto error;
3688 }
3689 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3690 Py_DECREF(b);
3691 }
3692 return a;
3693
3694 error:
3695 Py_DECREF(a);
3696 return NULL;
3697 }
3698
3699 /*[clinic input]
3700 math.perm
3701
3702 n: object
3703 k: object = None
3704 /
3705
3706 Number of ways to choose k items from n items without repetition and with order.
3707
3708 Evaluates to n! / (n - k)! when k <= n and evaluates
3709 to zero when k > n.
3710
3711 If k is not specified or is None, then k defaults to n
3712 and the function returns n!.
3713
3714 Raises TypeError if either of the arguments are not integers.
3715 Raises ValueError if either of the arguments are negative.
3716 [clinic start generated code]*/
3717
3718 static PyObject *
math_perm_impl(PyObject * module,PyObject * n,PyObject * k)3719 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3720 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3721 {
3722 PyObject *result = NULL;
3723 int overflow, cmp;
3724 long long ki, ni;
3725
3726 if (k == Py_None) {
3727 return math_factorial(module, n);
3728 }
3729 n = PyNumber_Index(n);
3730 if (n == NULL) {
3731 return NULL;
3732 }
3733 k = PyNumber_Index(k);
3734 if (k == NULL) {
3735 Py_DECREF(n);
3736 return NULL;
3737 }
3738 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3739
3740 if (_PyLong_IsNegative((PyLongObject *)n)) {
3741 PyErr_SetString(PyExc_ValueError,
3742 "n must be a non-negative integer");
3743 goto error;
3744 }
3745 if (_PyLong_IsNegative((PyLongObject *)k)) {
3746 PyErr_SetString(PyExc_ValueError,
3747 "k must be a non-negative integer");
3748 goto error;
3749 }
3750
3751 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3752 if (cmp != 0) {
3753 if (cmp > 0) {
3754 result = PyLong_FromLong(0);
3755 goto done;
3756 }
3757 goto error;
3758 }
3759
3760 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3761 assert(overflow >= 0 && !PyErr_Occurred());
3762 if (overflow > 0) {
3763 PyErr_Format(PyExc_OverflowError,
3764 "k must not exceed %lld",
3765 LLONG_MAX);
3766 goto error;
3767 }
3768 assert(ki >= 0);
3769
3770 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3771 assert(overflow >= 0 && !PyErr_Occurred());
3772 if (!overflow && ki > 1) {
3773 assert(ni >= 0);
3774 result = perm_comb_small((unsigned long long)ni,
3775 (unsigned long long)ki, 0);
3776 }
3777 else {
3778 result = perm_comb(n, (unsigned long long)ki, 0);
3779 }
3780
3781 done:
3782 Py_DECREF(n);
3783 Py_DECREF(k);
3784 return result;
3785
3786 error:
3787 Py_DECREF(n);
3788 Py_DECREF(k);
3789 return NULL;
3790 }
3791
3792 /*[clinic input]
3793 math.comb
3794
3795 n: object
3796 k: object
3797 /
3798
3799 Number of ways to choose k items from n items without repetition and without order.
3800
3801 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3802 to zero when k > n.
3803
3804 Also called the binomial coefficient because it is equivalent
3805 to the coefficient of k-th term in polynomial expansion of the
3806 expression (1 + x)**n.
3807
3808 Raises TypeError if either of the arguments are not integers.
3809 Raises ValueError if either of the arguments are negative.
3810
3811 [clinic start generated code]*/
3812
3813 static PyObject *
math_comb_impl(PyObject * module,PyObject * n,PyObject * k)3814 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3815 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3816 {
3817 PyObject *result = NULL, *temp;
3818 int overflow, cmp;
3819 long long ki, ni;
3820
3821 n = PyNumber_Index(n);
3822 if (n == NULL) {
3823 return NULL;
3824 }
3825 k = PyNumber_Index(k);
3826 if (k == NULL) {
3827 Py_DECREF(n);
3828 return NULL;
3829 }
3830 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3831
3832 if (_PyLong_IsNegative((PyLongObject *)n)) {
3833 PyErr_SetString(PyExc_ValueError,
3834 "n must be a non-negative integer");
3835 goto error;
3836 }
3837 if (_PyLong_IsNegative((PyLongObject *)k)) {
3838 PyErr_SetString(PyExc_ValueError,
3839 "k must be a non-negative integer");
3840 goto error;
3841 }
3842
3843 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3844 assert(overflow >= 0 && !PyErr_Occurred());
3845 if (!overflow) {
3846 assert(ni >= 0);
3847 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3848 assert(overflow >= 0 && !PyErr_Occurred());
3849 if (overflow || ki > ni) {
3850 result = PyLong_FromLong(0);
3851 goto done;
3852 }
3853 assert(ki >= 0);
3854
3855 ki = Py_MIN(ki, ni - ki);
3856 if (ki > 1) {
3857 result = perm_comb_small((unsigned long long)ni,
3858 (unsigned long long)ki, 1);
3859 goto done;
3860 }
3861 /* For k == 1 just return the original n in perm_comb(). */
3862 }
3863 else {
3864 /* k = min(k, n - k) */
3865 temp = PyNumber_Subtract(n, k);
3866 if (temp == NULL) {
3867 goto error;
3868 }
3869 assert(PyLong_Check(temp));
3870 if (_PyLong_IsNegative((PyLongObject *)temp)) {
3871 Py_DECREF(temp);
3872 result = PyLong_FromLong(0);
3873 goto done;
3874 }
3875 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3876 if (cmp > 0) {
3877 Py_SETREF(k, temp);
3878 }
3879 else {
3880 Py_DECREF(temp);
3881 if (cmp < 0) {
3882 goto error;
3883 }
3884 }
3885
3886 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3887 assert(overflow >= 0 && !PyErr_Occurred());
3888 if (overflow) {
3889 PyErr_Format(PyExc_OverflowError,
3890 "min(n - k, k) must not exceed %lld",
3891 LLONG_MAX);
3892 goto error;
3893 }
3894 assert(ki >= 0);
3895 }
3896
3897 result = perm_comb(n, (unsigned long long)ki, 1);
3898
3899 done:
3900 Py_DECREF(n);
3901 Py_DECREF(k);
3902 return result;
3903
3904 error:
3905 Py_DECREF(n);
3906 Py_DECREF(k);
3907 return NULL;
3908 }
3909
3910
3911 /*[clinic input]
3912 math.nextafter
3913
3914 x: double
3915 y: double
3916 /
3917 *
3918 steps: object = None
3919
3920 Return the floating-point value the given number of steps after x towards y.
3921
3922 If steps is not specified or is None, it defaults to 1.
3923
3924 Raises a TypeError, if x or y is not a double, or if steps is not an integer.
3925 Raises ValueError if steps is negative.
3926 [clinic start generated code]*/
3927
3928 static PyObject *
math_nextafter_impl(PyObject * module,double x,double y,PyObject * steps)3929 math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
3930 /*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
3931 {
3932 #if defined(_AIX)
3933 if (x == y) {
3934 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3935 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3936 return PyFloat_FromDouble(y);
3937 }
3938 if (Py_IS_NAN(x)) {
3939 return PyFloat_FromDouble(x);
3940 }
3941 if (Py_IS_NAN(y)) {
3942 return PyFloat_FromDouble(y);
3943 }
3944 #endif
3945 if (steps == Py_None) {
3946 // fast path: we default to one step.
3947 return PyFloat_FromDouble(nextafter(x, y));
3948 }
3949 steps = PyNumber_Index(steps);
3950 if (steps == NULL) {
3951 return NULL;
3952 }
3953 assert(PyLong_CheckExact(steps));
3954 if (_PyLong_IsNegative((PyLongObject *)steps)) {
3955 PyErr_SetString(PyExc_ValueError,
3956 "steps must be a non-negative integer");
3957 Py_DECREF(steps);
3958 return NULL;
3959 }
3960
3961 unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
3962 // Conveniently, uint64_t and double have the same number of bits
3963 // on all the platforms we care about.
3964 // So if an overflow occurs, we can just use UINT64_MAX.
3965 Py_DECREF(steps);
3966 if (usteps_ull >= UINT64_MAX) {
3967 // This branch includes the case where an error occurred, since
3968 // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
3969 // usteps_ull can be strictly larger than UINT64_MAX on a machine
3970 // where unsigned long long has width > 64 bits.
3971 if (PyErr_Occurred()) {
3972 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
3973 PyErr_Clear();
3974 }
3975 else {
3976 return NULL;
3977 }
3978 }
3979 usteps_ull = UINT64_MAX;
3980 }
3981 assert(usteps_ull <= UINT64_MAX);
3982 uint64_t usteps = (uint64_t)usteps_ull;
3983
3984 if (usteps == 0) {
3985 return PyFloat_FromDouble(x);
3986 }
3987 if (Py_IS_NAN(x)) {
3988 return PyFloat_FromDouble(x);
3989 }
3990 if (Py_IS_NAN(y)) {
3991 return PyFloat_FromDouble(y);
3992 }
3993
3994 // We assume that double and uint64_t have the same endianness.
3995 // This is not guaranteed by the C-standard, but it is true for
3996 // all platforms we care about. (The most likely form of violation
3997 // would be a "mixed-endian" double.)
3998 union pun {double f; uint64_t i;};
3999 union pun ux = {x}, uy = {y};
4000 if (ux.i == uy.i) {
4001 return PyFloat_FromDouble(x);
4002 }
4003
4004 const uint64_t sign_bit = 1ULL<<63;
4005
4006 uint64_t ax = ux.i & ~sign_bit;
4007 uint64_t ay = uy.i & ~sign_bit;
4008
4009 // opposite signs
4010 if (((ux.i ^ uy.i) & sign_bit)) {
4011 // NOTE: ax + ay can never overflow, because their most significant bit
4012 // ain't set.
4013 if (ax + ay <= usteps) {
4014 return PyFloat_FromDouble(uy.f);
4015 // This comparison has to use <, because <= would get +0.0 vs -0.0
4016 // wrong.
4017 } else if (ax < usteps) {
4018 union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
4019 return PyFloat_FromDouble(result.f);
4020 } else {
4021 ux.i -= usteps;
4022 return PyFloat_FromDouble(ux.f);
4023 }
4024 // same sign
4025 } else if (ax > ay) {
4026 if (ax - ay >= usteps) {
4027 ux.i -= usteps;
4028 return PyFloat_FromDouble(ux.f);
4029 } else {
4030 return PyFloat_FromDouble(uy.f);
4031 }
4032 } else {
4033 if (ay - ax >= usteps) {
4034 ux.i += usteps;
4035 return PyFloat_FromDouble(ux.f);
4036 } else {
4037 return PyFloat_FromDouble(uy.f);
4038 }
4039 }
4040 }
4041
4042
4043 /*[clinic input]
4044 math.ulp -> double
4045
4046 x: double
4047 /
4048
4049 Return the value of the least significant bit of the float x.
4050 [clinic start generated code]*/
4051
4052 static double
math_ulp_impl(PyObject * module,double x)4053 math_ulp_impl(PyObject *module, double x)
4054 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4055 {
4056 if (Py_IS_NAN(x)) {
4057 return x;
4058 }
4059 x = fabs(x);
4060 if (Py_IS_INFINITY(x)) {
4061 return x;
4062 }
4063 double inf = Py_INFINITY;
4064 double x2 = nextafter(x, inf);
4065 if (Py_IS_INFINITY(x2)) {
4066 /* special case: x is the largest positive representable float */
4067 x2 = nextafter(x, -inf);
4068 return x - x2;
4069 }
4070 return x2 - x;
4071 }
4072
4073 static int
math_exec(PyObject * module)4074 math_exec(PyObject *module)
4075 {
4076
4077 math_module_state *state = get_math_module_state(module);
4078 state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
4079 if (state->str___ceil__ == NULL) {
4080 return -1;
4081 }
4082 state->str___floor__ = PyUnicode_InternFromString("__floor__");
4083 if (state->str___floor__ == NULL) {
4084 return -1;
4085 }
4086 state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
4087 if (state->str___trunc__ == NULL) {
4088 return -1;
4089 }
4090 if (PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
4091 return -1;
4092 }
4093 if (PyModule_Add(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
4094 return -1;
4095 }
4096 // 2pi
4097 if (PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
4098 return -1;
4099 }
4100 if (PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
4101 return -1;
4102 }
4103 if (PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
4104 return -1;
4105 }
4106 return 0;
4107 }
4108
4109 static int
math_clear(PyObject * module)4110 math_clear(PyObject *module)
4111 {
4112 math_module_state *state = get_math_module_state(module);
4113 Py_CLEAR(state->str___ceil__);
4114 Py_CLEAR(state->str___floor__);
4115 Py_CLEAR(state->str___trunc__);
4116 return 0;
4117 }
4118
4119 static void
math_free(void * module)4120 math_free(void *module)
4121 {
4122 math_clear((PyObject *)module);
4123 }
4124
4125 static PyMethodDef math_methods[] = {
4126 {"acos", math_acos, METH_O, math_acos_doc},
4127 {"acosh", math_acosh, METH_O, math_acosh_doc},
4128 {"asin", math_asin, METH_O, math_asin_doc},
4129 {"asinh", math_asinh, METH_O, math_asinh_doc},
4130 {"atan", math_atan, METH_O, math_atan_doc},
4131 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
4132 {"atanh", math_atanh, METH_O, math_atanh_doc},
4133 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
4134 MATH_CEIL_METHODDEF
4135 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
4136 {"cos", math_cos, METH_O, math_cos_doc},
4137 {"cosh", math_cosh, METH_O, math_cosh_doc},
4138 MATH_DEGREES_METHODDEF
4139 MATH_DIST_METHODDEF
4140 {"erf", math_erf, METH_O, math_erf_doc},
4141 {"erfc", math_erfc, METH_O, math_erfc_doc},
4142 {"exp", math_exp, METH_O, math_exp_doc},
4143 {"exp2", math_exp2, METH_O, math_exp2_doc},
4144 {"expm1", math_expm1, METH_O, math_expm1_doc},
4145 {"fabs", math_fabs, METH_O, math_fabs_doc},
4146 MATH_FACTORIAL_METHODDEF
4147 MATH_FLOOR_METHODDEF
4148 MATH_FMA_METHODDEF
4149 MATH_FMOD_METHODDEF
4150 MATH_FREXP_METHODDEF
4151 MATH_FSUM_METHODDEF
4152 {"gamma", math_gamma, METH_O, math_gamma_doc},
4153 {"gcd", _PyCFunction_CAST(math_gcd), METH_FASTCALL, math_gcd_doc},
4154 {"hypot", _PyCFunction_CAST(math_hypot), METH_FASTCALL, math_hypot_doc},
4155 MATH_ISCLOSE_METHODDEF
4156 MATH_ISFINITE_METHODDEF
4157 MATH_ISINF_METHODDEF
4158 MATH_ISNAN_METHODDEF
4159 MATH_ISQRT_METHODDEF
4160 {"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc},
4161 MATH_LDEXP_METHODDEF
4162 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
4163 {"log", _PyCFunction_CAST(math_log), METH_FASTCALL, math_log_doc},
4164 {"log1p", math_log1p, METH_O, math_log1p_doc},
4165 MATH_LOG10_METHODDEF
4166 MATH_LOG2_METHODDEF
4167 MATH_MODF_METHODDEF
4168 MATH_POW_METHODDEF
4169 MATH_RADIANS_METHODDEF
4170 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
4171 {"sin", math_sin, METH_O, math_sin_doc},
4172 {"sinh", math_sinh, METH_O, math_sinh_doc},
4173 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
4174 {"tan", math_tan, METH_O, math_tan_doc},
4175 {"tanh", math_tanh, METH_O, math_tanh_doc},
4176 MATH_SUMPROD_METHODDEF
4177 MATH_TRUNC_METHODDEF
4178 MATH_PROD_METHODDEF
4179 MATH_PERM_METHODDEF
4180 MATH_COMB_METHODDEF
4181 MATH_NEXTAFTER_METHODDEF
4182 MATH_ULP_METHODDEF
4183 {NULL, NULL} /* sentinel */
4184 };
4185
4186 static PyModuleDef_Slot math_slots[] = {
4187 {Py_mod_exec, math_exec},
4188 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
4189 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
4190 {0, NULL}
4191 };
4192
4193 PyDoc_STRVAR(module_doc,
4194 "This module provides access to the mathematical functions\n"
4195 "defined by the C standard.");
4196
4197 static struct PyModuleDef mathmodule = {
4198 PyModuleDef_HEAD_INIT,
4199 .m_name = "math",
4200 .m_doc = module_doc,
4201 .m_size = sizeof(math_module_state),
4202 .m_methods = math_methods,
4203 .m_slots = math_slots,
4204 .m_clear = math_clear,
4205 .m_free = math_free,
4206 };
4207
4208 PyMODINIT_FUNC
PyInit_math(void)4209 PyInit_math(void)
4210 {
4211 return PyModuleDef_Init(&mathmodule);
4212 }
4213