s_fma.c source code [glibc/sysdeps/ieee754/dbl-64/s_fma.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2020 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <float.h>
21	#include <math.h>
22	#include <fenv.h>
23	#include <ieee754.h>
24	#include <math-barriers.h>
25	#include <fenv_private.h>
26	#include <libm-alias-double.h>
27	#include <tininess.h>
28	#include <math-use-builtins.h>
29
30	/ This implementation uses rounding to odd to avoid problems with*
31	double rounding. See a paper by Boldo and Melquiond:
32	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
33
34	double
35	__fma (double x, double y, double z)
36	{
37	#if USE_FMA_BUILTIN
38	return __builtin_fma (x, y, z);
39	#else
40	/ Use generic implementation. /
41	union ieee754_double u, v, w;
42	int adjust = `0`;
43	u.d = x;
44	v.d = y;
45	w.d = z;
46	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
47	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, `0`)
48	\|\| __builtin_expect (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
49	\|\| __builtin_expect (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
50	\|\| __builtin_expect (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
51	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
52	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, `0`))
53	{
54	/ If z is Inf, but x and y are finite, the result should be*
55	z rather than NaN. /*
56	if (w.ieee.exponent == `0x7ff`
57	&& u.ieee.exponent != `0x7ff`
58	&& v.ieee.exponent != `0x7ff`)
59	return (z + x) + y;
60	/ If z is zero and x are y are nonzero, compute the result*
61	as x y to avoid the wrong sign of a zero result if x * y*
62	underflows to 0. /*
63	if (z == `0` && x != `0` && y != `0`)
64	return x * y;
65	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
66	x y + z. /
67	if (u.ieee.exponent == `0x7ff`
68	\|\| v.ieee.exponent == `0x7ff`
69	\|\| w.ieee.exponent == `0x7ff`
70	\|\| x == `0`
71	\|\| y == `0`)
72	return x * y + z;
73	/ If fma will certainly overflow, compute as x * y. /
74	if (u.ieee.exponent + v.ieee.exponent > `0x7ff` + IEEE754_DOUBLE_BIAS)
75	return x * y;
76	/ If x * y is less than 1/4 of DBL_TRUE_MIN, neither the*
77	result nor whether there is underflow depends on its exact
78	value, only on its sign. /*
79	if (u.ieee.exponent + v.ieee.exponent
80	< IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - `2`)
81	{
82	int neg = u.ieee.negative ^ v.ieee.negative;
83	double tiny = neg ? -`0x1p-1074` : `0x1p-1074`;
84	if (w.ieee.exponent >= `3`)
85	return tiny + z;
86	/ Scaling up, adding TINY and scaling down produces the*
87	correct result, because in round-to-nearest mode adding
88	TINY has no effect and in other modes double rounding is
89	harmless. But it may not produce required underflow
90	exceptions. /*
91	v.d = z * `0x1p54` + tiny;
92	if (TININESS_AFTER_ROUNDING
93	? v.ieee.exponent < `55`
94	: (w.ieee.exponent == `0`
95	\|\| (w.ieee.exponent == `1`
96	&& w.ieee.negative != neg
97	&& w.ieee.mantissa1 == `0`
98	&& w.ieee.mantissa0 == `0`)))
99	{
100	double force_underflow = x * y;
101	math_force_eval (force_underflow);
102	}
103	return v.d * `0x1p-54`;
104	}
105	if (u.ieee.exponent + v.ieee.exponent
106	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
107	{
108	/ Compute 1p-53 times smaller result and multiply*
109	at the end. /*
110	if (u.ieee.exponent > v.ieee.exponent)
111	u.ieee.exponent -= DBL_MANT_DIG;
112	else
113	v.ieee.exponent -= DBL_MANT_DIG;
114	/ If x + y exponent is very large and z exponent is very small,*
115	it doesn't matter if we don't adjust it. /*
116	if (w.ieee.exponent > DBL_MANT_DIG)
117	w.ieee.exponent -= DBL_MANT_DIG;
118	adjust = `1`;
119	}
120	else if (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
121	{
122	/ Similarly.*
123	If z exponent is very large and x and y exponents are
124	very small, adjust them up to avoid spurious underflows,
125	rather than down. /*
126	if (u.ieee.exponent + v.ieee.exponent
127	<= IEEE754_DOUBLE_BIAS + `2` * DBL_MANT_DIG)
128	{
129	if (u.ieee.exponent > v.ieee.exponent)
130	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
131	else
132	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
133	}
134	else if (u.ieee.exponent > v.ieee.exponent)
135	{
136	if (u.ieee.exponent > DBL_MANT_DIG)
137	u.ieee.exponent -= DBL_MANT_DIG;
138	}
139	else if (v.ieee.exponent > DBL_MANT_DIG)
140	v.ieee.exponent -= DBL_MANT_DIG;
141	w.ieee.exponent -= DBL_MANT_DIG;
142	adjust = `1`;
143	}
144	else if (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
145	{
146	u.ieee.exponent -= DBL_MANT_DIG;
147	if (v.ieee.exponent)
148	v.ieee.exponent += DBL_MANT_DIG;
149	else
150	v.d *= `0x1p53`;
151	}
152	else if (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
153	{
154	v.ieee.exponent -= DBL_MANT_DIG;
155	if (u.ieee.exponent)
156	u.ieee.exponent += DBL_MANT_DIG;
157	else
158	u.d *= `0x1p53`;
159	}
160	else / if (u.ieee.exponent + v.ieee.exponent*
161	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) /*
162	{
163	if (u.ieee.exponent > v.ieee.exponent)
164	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
165	else
166	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
167	if (w.ieee.exponent <= `4` * DBL_MANT_DIG + `6`)
168	{
169	if (w.ieee.exponent)
170	w.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
171	else
172	w.d *= `0x1p108`;
173	adjust = -`1`;
174	}
175	/ Otherwise x * y should just affect inexact*
176	and nothing else. /*
177	}
178	x = u.d;
179	y = v.d;
180	z = w.d;
181	}
182
183	/ Ensure correct sign of exact 0 + 0. /
184	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
185	{
186	x = math_opt_barrier (x);
187	return x * y + z;
188	}
189
190	fenv_t env;
191	libc_feholdexcept_setround (&env, FE_TONEAREST);
192
193	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
194	#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
195	double x1 = x * C;
196	double y1 = y * C;
197	double m1 = x * y;
198	x1 = (x - x1) + x1;
199	y1 = (y - y1) + y1;
200	double x2 = x - x1;
201	double y2 = y - y1;
202	double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
203
204	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
205	double a1 = z + m1;
206	double t1 = a1 - z;
207	double t2 = a1 - t1;
208	t1 = m1 - t1;
209	t2 = z - t2;
210	double a2 = t1 + t2;
211	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
212	math_force_eval (m2);
213	math_force_eval (a2);
214	feclearexcept (FE_INEXACT);
215
216	/ If the result is an exact zero, ensure it has the correct sign. /
217	if (a1 == `0` && m2 == `0`)
218	{
219	libc_feupdateenv (&env);
220	/ Ensure that round-to-nearest value of z + m1 is not reused. /
221	z = math_opt_barrier (z);
222	return z + m1;
223	}
224
225	libc_fesetround (FE_TOWARDZERO);
226
227	/ Perform m2 + a2 addition with round to odd. /
228	u.d = a2 + m2;
229
230	if (__glibc_unlikely (adjust < `0`))
231	{
232	if ((u.ieee.mantissa1 & `1`) == `0`)
233	u.ieee.mantissa1 \|= libc_fetestexcept (FE_INEXACT) != `0`;
234	v.d = a1 + u.d;
235	/ Ensure the addition is not scheduled after fetestexcept call. /
236	math_force_eval (v.d);
237	}
238
239	/ Reset rounding mode and test for inexact simultaneously. /
240	int j = libc_feupdateenv_test (&env, FE_INEXACT) != `0`;
241
242	if (__glibc_likely (adjust == `0`))
243	{
244	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
245	u.ieee.mantissa1 \|= j;
246	/ Result is a1 + u.d. /
247	return a1 + u.d;
248	}
249	else if (__glibc_likely (adjust > `0`))
250	{
251	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
252	u.ieee.mantissa1 \|= j;
253	/ Result is a1 + u.d, scaled up. /
254	return (a1 + u.d) * `0x1p53`;
255	}
256	else
257	{
258	/ If a1 + u.d is exact, the only rounding happens during*
259	scaling down. /*
260	if (j == `0`)
261	return v.d * `0x1p-108`;
262	/ If result rounded to zero is not subnormal, no double*
263	rounding will occur. /*
264	if (v.ieee.exponent > `108`)
265	return (a1 + u.d) * `0x1p-108`;
266	/ If v.d * 0x1p-108 with round to zero is a subnormal above*
267	or equal to DBL_MIN / 2, then v.d 0x1p-108 shifts mantissa*
268	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
269	change the round bit, not sticky or guard bit.
270	v.d 0x1p-108 never normalizes by shifting up,*
271	so round bit plus sticky bit should be already enough
272	for proper rounding. /*
273	if (v.ieee.exponent == `108`)
274	{
275	/ If the exponent would be in the normal range when*
276	rounding to normal precision with unbounded exponent
277	range, the exact result is known and spurious underflows
278	must be avoided on systems detecting tininess after
279	rounding. /*
280	if (TININESS_AFTER_ROUNDING)
281	{
282	w.d = a1 + u.d;
283	if (w.ieee.exponent == `109`)
284	return w.d * `0x1p-108`;
285	}
286	/ v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,*
287	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
288	bit. /*
289	w.d = `0.0`;
290	w.ieee.mantissa1 = ((v.ieee.mantissa1 & `3`) << `1`) \| j;
291	w.ieee.negative = v.ieee.negative;
292	v.ieee.mantissa1 &= ~`3U`;
293	v.d *= `0x1p-108`;
294	w.d *= `0x1p-2`;
295	return v.d + w.d;
296	}
297	v.ieee.mantissa1 \|= j;
298	return v.d * `0x1p-108`;
299	}
300	#endif /* ! USE_FMA_BUILTIN */
301	}
302	#ifndef __fma
303	libm_alias_double (__fma, fma)
304	#endif
305

Browse the source code of glibc/sysdeps/ieee754/dbl-64/s_fma.c