1/* Implementation of gamma function according to ISO C.
2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <math.h>
21#include <math-narrow-eval.h>
22#include <math_private.h>
23#include <fenv_private.h>
24#include <math-underflow.h>
25#include <float.h>
26#include <libm-alias-finite.h>
27#include <mul_split.h>
28
29/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's
30 approximation to gamma function. */
31
32static const double gamma_coeff[] =
33 {
34 0x1.5555555555555p-4,
35 -0xb.60b60b60b60b8p-12,
36 0x3.4034034034034p-12,
37 -0x2.7027027027028p-12,
38 0x3.72a3c5631fe46p-12,
39 -0x7.daac36664f1f4p-12,
40 };
41
42#define NCOEFF (sizeof (gamma_coeff) / sizeof (gamma_coeff[0]))
43
44/* Return gamma (X), for positive X less than 184, in the form R *
45 2^(*EXP2_ADJ), where R is the return value and *EXP2_ADJ is set to
46 avoid overflow or underflow in intermediate calculations. */
47
48static double
49gamma_positive (double x, int *exp2_adj)
50{
51 int local_signgam;
52 if (x < 0.5)
53 {
54 *exp2_adj = 0;
55 return __ieee754_exp (__ieee754_lgamma_r (x + 1, &local_signgam)) / x;
56 }
57 else if (x <= 1.5)
58 {
59 *exp2_adj = 0;
60 return __ieee754_exp (__ieee754_lgamma_r (x, &local_signgam));
61 }
62 else if (x < 6.5)
63 {
64 /* Adjust into the range for using exp (lgamma). */
65 *exp2_adj = 0;
66 double n = ceil (x - 1.5);
67 double x_adj = x - n;
68 double eps;
69 double prod = __gamma_product (x_adj, 0, n, &eps);
70 return (__ieee754_exp (__ieee754_lgamma_r (x_adj, &local_signgam))
71 * prod * (1.0 + eps));
72 }
73 else
74 {
75 double eps = 0;
76 double x_eps = 0;
77 double x_adj = x;
78 double prod = 1;
79 if (x < 12.0)
80 {
81 /* Adjust into the range for applying Stirling's
82 approximation. */
83 double n = ceil (12.0 - x);
84 x_adj = math_narrow_eval (x + n);
85 x_eps = (x - (x_adj - n));
86 prod = __gamma_product (x_adj - n, x_eps, n, &eps);
87 }
88 /* The result is now gamma (X_ADJ + X_EPS) / (PROD * (1 + EPS)).
89 Compute gamma (X_ADJ + X_EPS) using Stirling's approximation,
90 starting by computing pow (X_ADJ, X_ADJ) with a power of 2
91 factored out. */
92 double x_adj_int = round (x_adj);
93 double x_adj_frac = x_adj - x_adj_int;
94 int x_adj_log2;
95 double x_adj_mant = __frexp (x_adj, &x_adj_log2);
96 if (x_adj_mant < M_SQRT1_2)
97 {
98 x_adj_log2--;
99 x_adj_mant *= 2.0;
100 }
101 *exp2_adj = x_adj_log2 * (int) x_adj_int;
102 double h1, l1, h2, l2;
103 mul_split (&h1, &l1, __ieee754_pow (x_adj_mant, x_adj),
104 __ieee754_exp2 (x_adj_log2 * x_adj_frac));
105 mul_split (&h2, &l2, __ieee754_exp (-x_adj), sqrt (2 * M_PI / x_adj));
106 mul_expansion (&h1, &l1, h1, l1, h2, l2);
107 /* Divide by prod * (1 + eps). */
108 div_expansion (&h1, &l1, h1, l1, prod, prod * eps);
109 double exp_adj = x_eps * __ieee754_log (x_adj);
110 double bsum = gamma_coeff[NCOEFF - 1];
111 double x_adj2 = x_adj * x_adj;
112 for (size_t i = 1; i <= NCOEFF - 1; i++)
113 bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i];
114 exp_adj += bsum / x_adj;
115 /* Now return (h1+l1) * exp(exp_adj), where exp_adj is small. */
116 l1 += h1 * __expm1 (exp_adj);
117 return h1 + l1;
118 }
119}
120
121double
122__ieee754_gamma_r (double x, int *signgamp)
123{
124 int32_t hx;
125 uint32_t lx;
126 double ret;
127
128 EXTRACT_WORDS (hx, lx, x);
129
130 if (__glibc_unlikely (((hx & 0x7fffffff) | lx) == 0))
131 {
132 /* Return value for x == 0 is Inf with divide by zero exception. */
133 *signgamp = 0;
134 return 1.0 / x;
135 }
136 if (__builtin_expect (hx < 0, 0)
137 && (uint32_t) hx < 0xfff00000 && rint (x) == x)
138 {
139 /* Return value for integer x < 0 is NaN with invalid exception. */
140 *signgamp = 0;
141 return (x - x) / (x - x);
142 }
143 if (__glibc_unlikely ((unsigned int) hx == 0xfff00000 && lx == 0))
144 {
145 /* x == -Inf. According to ISO this is NaN. */
146 *signgamp = 0;
147 return x - x;
148 }
149 if (__glibc_unlikely ((hx & 0x7ff00000) == 0x7ff00000))
150 {
151 /* Positive infinity (return positive infinity) or NaN (return
152 NaN). */
153 *signgamp = 0;
154 return x + x;
155 }
156
157 if (x >= 172.0)
158 {
159 /* Overflow. */
160 *signgamp = 0;
161 ret = math_narrow_eval (DBL_MAX * DBL_MAX);
162 return ret;
163 }
164 else
165 {
166 SET_RESTORE_ROUND (FE_TONEAREST);
167 if (x > 0.0)
168 {
169 *signgamp = 0;
170 int exp2_adj;
171 double tret = gamma_positive (x, &exp2_adj);
172 ret = __scalbn (tret, exp2_adj);
173 }
174 else if (x >= -DBL_EPSILON / 4.0)
175 {
176 *signgamp = 0;
177 ret = 1.0 / x;
178 }
179 else
180 {
181 double tx = trunc (x);
182 *signgamp = (tx == 2.0 * trunc (tx / 2.0)) ? -1 : 1;
183 if (x <= -184.0)
184 /* Underflow. */
185 ret = DBL_MIN * DBL_MIN;
186 else
187 {
188 double frac = tx - x;
189 if (frac > 0.5)
190 frac = 1.0 - frac;
191 double sinpix = (frac <= 0.25
192 ? __sin (M_PI * frac)
193 : __cos (M_PI * (0.5 - frac)));
194 int exp2_adj;
195 double h1, l1, h2, l2;
196 h2 = gamma_positive (-x, &exp2_adj);
197 mul_split (&h1, &l1, sinpix, h2);
198 /* sinpix*gamma_positive(.) = h1 + l1 */
199 mul_split (&h2, &l2, h1, x);
200 /* h1*x = h2 + l2 */
201 /* (h1 + l1) * x = h1*x + l1*x = h2 + l2 + l1*x */
202 l2 += l1 * x;
203 /* x*sinpix*gamma_positive(.) ~ h2 + l2 */
204 h1 = 0x3.243f6a8885a3p+0; /* binary64 approximation of Pi */
205 l1 = 0x8.d313198a2e038p-56; /* |h1+l1-Pi| < 3e-33 */
206 /* Now we divide h1 + l1 by h2 + l2. */
207 div_expansion (&h1, &l1, h1, l1, h2, l2);
208 ret = __scalbn (-h1, -exp2_adj);
209 math_check_force_underflow_nonneg (ret);
210 }
211 }
212 ret = math_narrow_eval (ret);
213 }
214 if (isinf (ret) && x != 0)
215 {
216 if (*signgamp < 0)
217 {
218 ret = math_narrow_eval (-copysign (DBL_MAX, ret) * DBL_MAX);
219 ret = -ret;
220 }
221 else
222 ret = math_narrow_eval (copysign (DBL_MAX, ret) * DBL_MAX);
223 return ret;
224 }
225 else if (ret == 0)
226 {
227 if (*signgamp < 0)
228 {
229 ret = math_narrow_eval (-copysign (DBL_MIN, ret) * DBL_MIN);
230 ret = -ret;
231 }
232 else
233 ret = math_narrow_eval (copysign (DBL_MIN, ret) * DBL_MIN);
234 return ret;
235 }
236 else
237 return ret;
238}
239libm_alias_finite (__ieee754_gamma_r, __gamma_r)
240