s_fmal.c source code [glibc/sysdeps/ieee754/ldbl-128/s_fmal.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#define NO_MATH_REDIRECT
20	#include <float.h>
21	#define f64xfmaf128 __hide_f64xfmaf128
22	#include <math.h>
23	#undef f64xfmaf128
24	#include <fenv.h>
25	#include <ieee754.h>
26	#include <math-barriers.h>
27	#include <math_private.h>
28	#include <libm-alias-ldouble.h>
29	#include <math-narrow-alias.h>
30	#include <tininess.h>
31	#include <math-use-builtins.h>
32
33	/ This implementation uses rounding to odd to avoid problems with*
34	double rounding. See a paper by Boldo and Melquiond:
35	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
36
37	_Float128
38	__fmal (_Float128 x, _Float128 y, _Float128 z)
39	{
40	#if USE_FMAL_BUILTIN
41	return __builtin_fmal (x, y, z);
42	#else
43	union ieee854_long_double u, v, w;
44	int adjust = `0`;
45	u.d = x;
46	v.d = y;
47	w.d = z;
48	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
49	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS
50	- LDBL_MANT_DIG, `0`)
51	\|\| __builtin_expect (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
52	\|\| __builtin_expect (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
53	\|\| __builtin_expect (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
54	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
55	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, `0`))
56	{
57	/ If z is Inf, but x and y are finite, the result should be*
58	z rather than NaN. /*
59	if (w.ieee.exponent == `0x7fff`
60	&& u.ieee.exponent != `0x7fff`
61	&& v.ieee.exponent != `0x7fff`)
62	return (z + x) + y;
63	/ If z is zero and x are y are nonzero, compute the result*
64	as x y to avoid the wrong sign of a zero result if x * y*
65	underflows to 0. /*
66	if (z == `0` && x != `0` && y != `0`)
67	return x * y;
68	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
69	x y + z. /
70	if (u.ieee.exponent == `0x7fff`
71	\|\| v.ieee.exponent == `0x7fff`
72	\|\| w.ieee.exponent == `0x7fff`
73	\|\| x == `0`
74	\|\| y == `0`)
75	return x * y + z;
76	/ If fma will certainly overflow, compute as x * y. /
77	if (u.ieee.exponent + v.ieee.exponent
78	> `0x7fff` + IEEE854_LONG_DOUBLE_BIAS)
79	return x * y;
80	/ If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the*
81	result nor whether there is underflow depends on its exact
82	value, only on its sign. /*
83	if (u.ieee.exponent + v.ieee.exponent
84	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - `2`)
85	{
86	int neg = u.ieee.negative ^ v.ieee.negative;
87	_Float128 tiny = neg ? L(-`0x1p-16494`) : L(`0x1p-16494`);
88	if (w.ieee.exponent >= `3`)
89	return tiny + z;
90	/ Scaling up, adding TINY and scaling down produces the*
91	correct result, because in round-to-nearest mode adding
92	TINY has no effect and in other modes double rounding is
93	harmless. But it may not produce required underflow
94	exceptions. /*
95	v.d = z * L(`0x1p114`) + tiny;
96	if (TININESS_AFTER_ROUNDING
97	? v.ieee.exponent < `115`
98	: (w.ieee.exponent == `0`
99	\|\| (w.ieee.exponent == `1`
100	&& w.ieee.negative != neg
101	&& w.ieee.mantissa3 == `0`
102	&& w.ieee.mantissa2 == `0`
103	&& w.ieee.mantissa1 == `0`
104	&& w.ieee.mantissa0 == `0`)))
105	{
106	_Float128 force_underflow = x * y;
107	math_force_eval (force_underflow);
108	}
109	return v.d * L(`0x1p-114`);
110	}
111	if (u.ieee.exponent + v.ieee.exponent
112	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
113	{
114	/ Compute 1p-113 times smaller result and multiply*
115	at the end. /*
116	if (u.ieee.exponent > v.ieee.exponent)
117	u.ieee.exponent -= LDBL_MANT_DIG;
118	else
119	v.ieee.exponent -= LDBL_MANT_DIG;
120	/ If x + y exponent is very large and z exponent is very small,*
121	it doesn't matter if we don't adjust it. /*
122	if (w.ieee.exponent > LDBL_MANT_DIG)
123	w.ieee.exponent -= LDBL_MANT_DIG;
124	adjust = `1`;
125	}
126	else if (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
127	{
128	/ Similarly.*
129	If z exponent is very large and x and y exponents are
130	very small, adjust them up to avoid spurious underflows,
131	rather than down. /*
132	if (u.ieee.exponent + v.ieee.exponent
133	<= IEEE854_LONG_DOUBLE_BIAS + `2` * LDBL_MANT_DIG)
134	{
135	if (u.ieee.exponent > v.ieee.exponent)
136	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
137	else
138	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
139	}
140	else if (u.ieee.exponent > v.ieee.exponent)
141	{
142	if (u.ieee.exponent > LDBL_MANT_DIG)
143	u.ieee.exponent -= LDBL_MANT_DIG;
144	}
145	else if (v.ieee.exponent > LDBL_MANT_DIG)
146	v.ieee.exponent -= LDBL_MANT_DIG;
147	w.ieee.exponent -= LDBL_MANT_DIG;
148	adjust = `1`;
149	}
150	else if (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
151	{
152	u.ieee.exponent -= LDBL_MANT_DIG;
153	if (v.ieee.exponent)
154	v.ieee.exponent += LDBL_MANT_DIG;
155	else
156	v.d *= L(`0x1p113`);
157	}
158	else if (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
159	{
160	v.ieee.exponent -= LDBL_MANT_DIG;
161	if (u.ieee.exponent)
162	u.ieee.exponent += LDBL_MANT_DIG;
163	else
164	u.d *= L(`0x1p113`);
165	}
166	else / if (u.ieee.exponent + v.ieee.exponent*
167	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) /*
168	{
169	if (u.ieee.exponent > v.ieee.exponent)
170	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
171	else
172	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
173	if (w.ieee.exponent <= `4` * LDBL_MANT_DIG + `6`)
174	{
175	if (w.ieee.exponent)
176	w.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
177	else
178	w.d *= L(`0x1p228`);
179	adjust = -`1`;
180	}
181	/ Otherwise x * y should just affect inexact*
182	and nothing else. /*
183	}
184	x = u.d;
185	y = v.d;
186	z = w.d;
187	}
188
189	/ Ensure correct sign of exact 0 + 0. /
190	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
191	{
192	x = math_opt_barrier (x);
193	return x * y + z;
194	}
195
196	fenv_t env;
197	feholdexcept (&env);
198	fesetround (FE_TONEAREST);
199
200	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
201	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
202	_Float128 x1 = x * C;
203	_Float128 y1 = y * C;
204	_Float128 m1 = x * y;
205	x1 = (x - x1) + x1;
206	y1 = (y - y1) + y1;
207	_Float128 x2 = x - x1;
208	_Float128 y2 = y - y1;
209	_Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
210
211	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
212	_Float128 a1 = z + m1;
213	_Float128 t1 = a1 - z;
214	_Float128 t2 = a1 - t1;
215	t1 = m1 - t1;
216	t2 = z - t2;
217	_Float128 a2 = t1 + t2;
218	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
219	math_force_eval (m2);
220	math_force_eval (a2);
221	feclearexcept (FE_INEXACT);
222
223	/ If the result is an exact zero, ensure it has the correct sign. /
224	if (a1 == `0` && m2 == `0`)
225	{
226	feupdateenv (&env);
227	/ Ensure that round-to-nearest value of z + m1 is not reused. /
228	z = math_opt_barrier (z);
229	return z + m1;
230	}
231
232	fesetround (FE_TOWARDZERO);
233	/ Perform m2 + a2 addition with round to odd. /
234	u.d = a2 + m2;
235
236	if (__glibc_likely (adjust == `0`))
237	{
238	if ((u.ieee.mantissa3 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
239	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
240	feupdateenv (&env);
241	/ Result is a1 + u.d. /
242	return a1 + u.d;
243	}
244	else if (__glibc_likely (adjust > `0`))
245	{
246	if ((u.ieee.mantissa3 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
247	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
248	feupdateenv (&env);
249	/ Result is a1 + u.d, scaled up. /
250	return (a1 + u.d) * L(`0x1p113`);
251	}
252	else
253	{
254	if ((u.ieee.mantissa3 & `1`) == `0`)
255	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
256	v.d = a1 + u.d;
257	/ Ensure the addition is not scheduled after fetestexcept call. /
258	math_force_eval (v.d);
259	int j = fetestexcept (FE_INEXACT) != `0`;
260	feupdateenv (&env);
261	/ Ensure the following computations are performed in default rounding*
262	mode instead of just reusing the round to zero computation. /*
263	asm volatile ("" : "=m" (u) : "m" (u));
264	/ If a1 + u.d is exact, the only rounding happens during*
265	scaling down. /*
266	if (j == `0`)
267	return v.d * L(`0x1p-228`);
268	/ If result rounded to zero is not subnormal, no double*
269	rounding will occur. /*
270	if (v.ieee.exponent > `228`)
271	return (a1 + u.d) * L(`0x1p-228`);
272	/ If v.d * 0x1p-228L with round to zero is a subnormal above*
273	or equal to LDBL_MIN / 2, then v.d 0x1p-228L shifts mantissa*
274	down just by 1 bit, which means v.ieee.mantissa3 \|= j would
275	change the round bit, not sticky or guard bit.
276	v.d 0x1p-228L never normalizes by shifting up,*
277	so round bit plus sticky bit should be already enough
278	for proper rounding. /*
279	if (v.ieee.exponent == `228`)
280	{
281	/ If the exponent would be in the normal range when*
282	rounding to normal precision with unbounded exponent
283	range, the exact result is known and spurious underflows
284	must be avoided on systems detecting tininess after
285	rounding. /*
286	if (TININESS_AFTER_ROUNDING)
287	{
288	w.d = a1 + u.d;
289	if (w.ieee.exponent == `229`)
290	return w.d * L(`0x1p-228`);
291	}
292	/ v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,*
293	v.ieee.mantissa3 & 1 is the round bit and j is our sticky
294	bit. /*
295	w.d = `0`;
296	w.ieee.mantissa3 = ((v.ieee.mantissa3 & `3`) << `1`) \| j;
297	w.ieee.negative = v.ieee.negative;
298	v.ieee.mantissa3 &= ~`3U`;
299	v.d *= L(`0x1p-228`);
300	w.d *= L(`0x1p-2`);
301	return v.d + w.d;
302	}
303	v.ieee.mantissa3 \|= j;
304	return v.d * L(`0x1p-228`);
305	}
306	#endif /* ! USE_FMAL_BUILTIN */
307	}
308	libm_alias_ldouble (__fma, fma)
309	libm_alias_ldouble_narrow (__fma, fma)
310

Browse the source code of glibc/sysdeps/ieee754/ldbl-128/s_fmal.c