s_fmal.c source code [glibc/sysdeps/ieee754/ldbl-96/s_fmal.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#define NO_MATH_REDIRECT
20	#include <float.h>
21	#include <math.h>
22	#include <fenv.h>
23	#include <ieee754.h>
24	#include <math-barriers.h>
25	#include <libm-alias-ldouble.h>
26	#include <tininess.h>
27
28	/ This implementation uses rounding to odd to avoid problems with*
29	double rounding. See a paper by Boldo and Melquiond:
30	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
31
32	long double
33	__fmal (long double x, long double y, long double z)
34	{
35	union ieee854_long_double u, v, w;
36	int adjust = `0`;
37	u.d = x;
38	v.d = y;
39	w.d = z;
40	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
41	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS
42	- LDBL_MANT_DIG, `0`)
43	\|\| __builtin_expect (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
44	\|\| __builtin_expect (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
45	\|\| __builtin_expect (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
46	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
47	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, `0`))
48	{
49	/ If z is Inf, but x and y are finite, the result should be*
50	z rather than NaN. /*
51	if (w.ieee.exponent == `0x7fff`
52	&& u.ieee.exponent != `0x7fff`
53	&& v.ieee.exponent != `0x7fff`)
54	return (z + x) + y;
55	/ If z is zero and x are y are nonzero, compute the result*
56	as x y to avoid the wrong sign of a zero result if x * y*
57	underflows to 0. /*
58	if (z == `0` && x != `0` && y != `0`)
59	return x * y;
60	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
61	x y + z. /
62	if (u.ieee.exponent == `0x7fff`
63	\|\| v.ieee.exponent == `0x7fff`
64	\|\| w.ieee.exponent == `0x7fff`
65	\|\| x == `0`
66	\|\| y == `0`)
67	return x * y + z;
68	/ If fma will certainly overflow, compute as x * y. /
69	if (u.ieee.exponent + v.ieee.exponent
70	> `0x7fff` + IEEE854_LONG_DOUBLE_BIAS)
71	return x * y;
72	/ If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the*
73	result nor whether there is underflow depends on its exact
74	value, only on its sign. /*
75	if (u.ieee.exponent + v.ieee.exponent
76	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - `2`)
77	{
78	int neg = u.ieee.negative ^ v.ieee.negative;
79	long double tiny = neg ? -`0x1p-16445L` : `0x1p-16445L`;
80	if (w.ieee.exponent >= `3`)
81	return tiny + z;
82	/ Scaling up, adding TINY and scaling down produces the*
83	correct result, because in round-to-nearest mode adding
84	TINY has no effect and in other modes double rounding is
85	harmless. But it may not produce required underflow
86	exceptions. /*
87	v.d = z * `0x1p65L` + tiny;
88	if (TININESS_AFTER_ROUNDING
89	? v.ieee.exponent < `66`
90	: (w.ieee.exponent == `0`
91	\|\| (w.ieee.exponent == `1`
92	&& w.ieee.negative != neg
93	&& w.ieee.mantissa1 == `0`
94	&& w.ieee.mantissa0 == `0x80000000`)))
95	{
96	long double force_underflow = x * y;
97	math_force_eval (force_underflow);
98	}
99	return v.d * `0x1p-65L`;
100	}
101	if (u.ieee.exponent + v.ieee.exponent
102	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
103	{
104	/ Compute 1p-64 times smaller result and multiply*
105	at the end. /*
106	if (u.ieee.exponent > v.ieee.exponent)
107	u.ieee.exponent -= LDBL_MANT_DIG;
108	else
109	v.ieee.exponent -= LDBL_MANT_DIG;
110	/ If x + y exponent is very large and z exponent is very small,*
111	it doesn't matter if we don't adjust it. /*
112	if (w.ieee.exponent > LDBL_MANT_DIG)
113	w.ieee.exponent -= LDBL_MANT_DIG;
114	adjust = `1`;
115	}
116	else if (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
117	{
118	/ Similarly.*
119	If z exponent is very large and x and y exponents are
120	very small, adjust them up to avoid spurious underflows,
121	rather than down. /*
122	if (u.ieee.exponent + v.ieee.exponent
123	<= IEEE854_LONG_DOUBLE_BIAS + `2` * LDBL_MANT_DIG)
124	{
125	if (u.ieee.exponent > v.ieee.exponent)
126	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
127	else
128	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
129	}
130	else if (u.ieee.exponent > v.ieee.exponent)
131	{
132	if (u.ieee.exponent > LDBL_MANT_DIG)
133	u.ieee.exponent -= LDBL_MANT_DIG;
134	}
135	else if (v.ieee.exponent > LDBL_MANT_DIG)
136	v.ieee.exponent -= LDBL_MANT_DIG;
137	w.ieee.exponent -= LDBL_MANT_DIG;
138	adjust = `1`;
139	}
140	else if (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
141	{
142	u.ieee.exponent -= LDBL_MANT_DIG;
143	if (v.ieee.exponent)
144	v.ieee.exponent += LDBL_MANT_DIG;
145	else
146	v.d *= `0x1p64L`;
147	}
148	else if (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
149	{
150	v.ieee.exponent -= LDBL_MANT_DIG;
151	if (u.ieee.exponent)
152	u.ieee.exponent += LDBL_MANT_DIG;
153	else
154	u.d *= `0x1p64L`;
155	}
156	else / if (u.ieee.exponent + v.ieee.exponent*
157	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) /*
158	{
159	if (u.ieee.exponent > v.ieee.exponent)
160	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
161	else
162	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
163	if (w.ieee.exponent <= `4` * LDBL_MANT_DIG + `6`)
164	{
165	if (w.ieee.exponent)
166	w.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
167	else
168	w.d *= `0x1p130L`;
169	adjust = -`1`;
170	}
171	/ Otherwise x * y should just affect inexact*
172	and nothing else. /*
173	}
174	x = u.d;
175	y = v.d;
176	z = w.d;
177	}
178
179	/ Ensure correct sign of exact 0 + 0. /
180	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
181	{
182	x = math_opt_barrier (x);
183	return x * y + z;
184	}
185
186	fenv_t env;
187	feholdexcept (&env);
188	fesetround (FE_TONEAREST);
189
190	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
191	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
192	long double x1 = x * C;
193	long double y1 = y * C;
194	long double m1 = x * y;
195	x1 = (x - x1) + x1;
196	y1 = (y - y1) + y1;
197	long double x2 = x - x1;
198	long double y2 = y - y1;
199	long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
200
201	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
202	long double a1 = z + m1;
203	long double t1 = a1 - z;
204	long double t2 = a1 - t1;
205	t1 = m1 - t1;
206	t2 = z - t2;
207	long double a2 = t1 + t2;
208	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
209	math_force_eval (m2);
210	math_force_eval (a2);
211	feclearexcept (FE_INEXACT);
212
213	/ If the result is an exact zero, ensure it has the correct sign. /
214	if (a1 == `0` && m2 == `0`)
215	{
216	feupdateenv (&env);
217	/ Ensure that round-to-nearest value of z + m1 is not reused. /
218	z = math_opt_barrier (z);
219	return z + m1;
220	}
221
222	fesetround (FE_TOWARDZERO);
223	/ Perform m2 + a2 addition with round to odd. /
224	u.d = a2 + m2;
225
226	if (__glibc_likely (adjust == `0`))
227	{
228	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
229	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
230	feupdateenv (&env);
231	/ Result is a1 + u.d. /
232	return a1 + u.d;
233	}
234	else if (__glibc_likely (adjust > `0`))
235	{
236	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
237	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
238	feupdateenv (&env);
239	/ Result is a1 + u.d, scaled up. /
240	return (a1 + u.d) * `0x1p64L`;
241	}
242	else
243	{
244	if ((u.ieee.mantissa1 & `1`) == `0`)
245	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != `0`;
246	v.d = a1 + u.d;
247	/ Ensure the addition is not scheduled after fetestexcept call. /
248	math_force_eval (v.d);
249	int j = fetestexcept (FE_INEXACT) != `0`;
250	feupdateenv (&env);
251	/ Ensure the following computations are performed in default rounding*
252	mode instead of just reusing the round to zero computation. /*
253	asm volatile ("" : "=m" (u) : "m" (u));
254	/ If a1 + u.d is exact, the only rounding happens during*
255	scaling down. /*
256	if (j == `0`)
257	return v.d * `0x1p-130L`;
258	/ If result rounded to zero is not subnormal, no double*
259	rounding will occur. /*
260	if (v.ieee.exponent > `130`)
261	return (a1 + u.d) * `0x1p-130L`;
262	/ If v.d * 0x1p-130L with round to zero is a subnormal above*
263	or equal to LDBL_MIN / 2, then v.d 0x1p-130L shifts mantissa*
264	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
265	change the round bit, not sticky or guard bit.
266	v.d 0x1p-130L never normalizes by shifting up,*
267	so round bit plus sticky bit should be already enough
268	for proper rounding. /*
269	if (v.ieee.exponent == `130`)
270	{
271	/ If the exponent would be in the normal range when*
272	rounding to normal precision with unbounded exponent
273	range, the exact result is known and spurious underflows
274	must be avoided on systems detecting tininess after
275	rounding. /*
276	if (TININESS_AFTER_ROUNDING)
277	{
278	w.d = a1 + u.d;
279	if (w.ieee.exponent == `131`)
280	return w.d * `0x1p-130L`;
281	}
282	/ v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,*
283	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
284	bit. /*
285	w.d = `0.0L`;
286	w.ieee.mantissa1 = ((v.ieee.mantissa1 & `3`) << `1`) \| j;
287	w.ieee.negative = v.ieee.negative;
288	v.ieee.mantissa1 &= ~`3U`;
289	v.d *= `0x1p-130L`;
290	w.d *= `0x1p-2L`;
291	return v.d + w.d;
292	}
293	v.ieee.mantissa1 \|= j;
294	return v.d * `0x1p-130L`;
295	}
296	}
297	libm_alias_ldouble (__fma, fma)
298

Browse the source code of glibc/sysdeps/ieee754/ldbl-96/s_fmal.c