s_fmal.c source code [glibc/sysdeps/ieee754/ldbl-128/s_fmal.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2020 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <float.h>
21	#include <math.h>
22	#include <fenv.h>
23	#include <ieee754.h>
24	#include <math-barriers.h>
25	#include <math_private.h>
26	#include <libm-alias-ldouble.h>
27	#include <tininess.h>
28	#include <math-use-builtins.h>
29
30	/ This implementation uses rounding to odd to avoid problems with*
31	double rounding. See a paper by Boldo and Melquiond:
32	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
33
34	_Float128
35	__fmal (_Float128 x, _Float128 y, _Float128 z)
36	{
37	#if USE_FMAL_BUILTIN
38	return __builtin_fmal (x, y, z);
39	#else
40	union ieee854_long_double u, v, w;
41	int adjust = `0`;
42	u.d = x;
43	v.d = y;
44	w.d = z;
45	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
46	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS
47	- LDBL_MANT_DIG, `0`)
48	\|\| __builtin_expect (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
49	\|\| __builtin_expect (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
50	\|\| __builtin_expect (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG, `0`)
51	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
52	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, `0`))
53	{
54	/ If z is Inf, but x and y are finite, the result should be*
55	z rather than NaN. /*
56	if (w.ieee.exponent == `0x7fff`
57	&& u.ieee.exponent != `0x7fff`
58	&& v.ieee.exponent != `0x7fff`)
59	return (z + x) + y;
60	/ If z is zero and x are y are nonzero, compute the result*
61	as x y to avoid the wrong sign of a zero result if x * y*
62	underflows to 0. /*
63	if (z == `0` && x != `0` && y != `0`)
64	return x * y;
65	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
66	x y + z. /
67	if (u.ieee.exponent == `0x7fff`
68	\|\| v.ieee.exponent == `0x7fff`
69	\|\| w.ieee.exponent == `0x7fff`
70	\|\| x == `0`
71	\|\| y == `0`)
72	return x * y + z;
73	/ If fma will certainly overflow, compute as x * y. /
74	if (u.ieee.exponent + v.ieee.exponent
75	> `0x7fff` + IEEE854_LONG_DOUBLE_BIAS)
76	return x * y;
77	/ If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the*
78	result nor whether there is underflow depends on its exact
79	value, only on its sign. /*
80	if (u.ieee.exponent + v.ieee.exponent
81	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - `2`)
82	{
83	int neg = u.ieee.negative ^ v.ieee.negative;
84	_Float128 tiny = neg ? L(-`0x1p-16494`) : L(`0x1p-16494`);
85	if (w.ieee.exponent >= `3`)
86	return tiny + z;
87	/ Scaling up, adding TINY and scaling down produces the*
88	correct result, because in round-to-nearest mode adding
89	TINY has no effect and in other modes double rounding is
90	harmless. But it may not produce required underflow
91	exceptions. /*
92	v.d = z * L(`0x1p114`) + tiny;
93	if (TININESS_AFTER_ROUNDING
94	? v.ieee.exponent < `115`
95	: (w.ieee.exponent == `0`
96	\|\| (w.ieee.exponent == `1`
97	&& w.ieee.negative != neg
98	&& w.ieee.mantissa3 == `0`
99	&& w.ieee.mantissa2 == `0`
100	&& w.ieee.mantissa1 == `0`
101	&& w.ieee.mantissa0 == `0`)))
102	{
103	_Float128 force_underflow = x * y;
104	math_force_eval (force_underflow);
105	}
106	return v.d * L(`0x1p-114`);
107	}
108	if (u.ieee.exponent + v.ieee.exponent
109	>= `0x7fff` + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
110	{
111	/ Compute 1p-113 times smaller result and multiply*
112	at the end. /*
113	if (u.ieee.exponent > v.ieee.exponent)
114	u.ieee.exponent -= LDBL_MANT_DIG;
115	else
116	v.ieee.exponent -= LDBL_MANT_DIG;
117	/ If x + y exponent is very large and z exponent is very small,*
118	it doesn't matter if we don't adjust it. /*
119	if (w.ieee.exponent > LDBL_MANT_DIG)
120	w.ieee.exponent -= LDBL_MANT_DIG;
121	adjust = `1`;
122	}
123	else if (w.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
124	{
125	/ Similarly.*
126	If z exponent is very large and x and y exponents are
127	very small, adjust them up to avoid spurious underflows,
128	rather than down. /*
129	if (u.ieee.exponent + v.ieee.exponent
130	<= IEEE854_LONG_DOUBLE_BIAS + `2` * LDBL_MANT_DIG)
131	{
132	if (u.ieee.exponent > v.ieee.exponent)
133	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
134	else
135	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
136	}
137	else if (u.ieee.exponent > v.ieee.exponent)
138	{
139	if (u.ieee.exponent > LDBL_MANT_DIG)
140	u.ieee.exponent -= LDBL_MANT_DIG;
141	}
142	else if (v.ieee.exponent > LDBL_MANT_DIG)
143	v.ieee.exponent -= LDBL_MANT_DIG;
144	w.ieee.exponent -= LDBL_MANT_DIG;
145	adjust = `1`;
146	}
147	else if (u.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
148	{
149	u.ieee.exponent -= LDBL_MANT_DIG;
150	if (v.ieee.exponent)
151	v.ieee.exponent += LDBL_MANT_DIG;
152	else
153	v.d *= L(`0x1p113`);
154	}
155	else if (v.ieee.exponent >= `0x7fff` - LDBL_MANT_DIG)
156	{
157	v.ieee.exponent -= LDBL_MANT_DIG;
158	if (u.ieee.exponent)
159	u.ieee.exponent += LDBL_MANT_DIG;
160	else
161	u.d *= L(`0x1p113`);
162	}
163	else / if (u.ieee.exponent + v.ieee.exponent*
164	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) /*
165	{
166	if (u.ieee.exponent > v.ieee.exponent)
167	u.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
168	else
169	v.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
170	if (w.ieee.exponent <= `4` * LDBL_MANT_DIG + `6`)
171	{
172	if (w.ieee.exponent)
173	w.ieee.exponent += `2` * LDBL_MANT_DIG + `2`;
174	else
175	w.d *= L(`0x1p228`);
176	adjust = -`1`;
177	}
178	/ Otherwise x * y should just affect inexact*
179	and nothing else. /*
180	}
181	x = u.d;
182	y = v.d;
183	z = w.d;
184	}
185
186	/ Ensure correct sign of exact 0 + 0. /
187	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
188	{
189	x = math_opt_barrier (x);
190	return x * y + z;
191	}
192
193	fenv_t env;
194	feholdexcept (&env);
195	fesetround (FE_TONEAREST);
196
197	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
198	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
199	_Float128 x1 = x * C;
200	_Float128 y1 = y * C;
201	_Float128 m1 = x * y;
202	x1 = (x - x1) + x1;
203	y1 = (y - y1) + y1;
204	_Float128 x2 = x - x1;
205	_Float128 y2 = y - y1;
206	_Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
207
208	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
209	_Float128 a1 = z + m1;
210	_Float128 t1 = a1 - z;
211	_Float128 t2 = a1 - t1;
212	t1 = m1 - t1;
213	t2 = z - t2;
214	_Float128 a2 = t1 + t2;
215	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
216	math_force_eval (m2);
217	math_force_eval (a2);
218	feclearexcept (FE_INEXACT);
219
220	/ If the result is an exact zero, ensure it has the correct sign. /
221	if (a1 == `0` && m2 == `0`)
222	{
223	feupdateenv (&env);
224	/ Ensure that round-to-nearest value of z + m1 is not reused. /
225	z = math_opt_barrier (z);
226	return z + m1;
227	}
228
229	fesetround (FE_TOWARDZERO);
230	/ Perform m2 + a2 addition with round to odd. /
231	u.d = a2 + m2;
232
233	if (__glibc_likely (adjust == `0`))
234	{
235	if ((u.ieee.mantissa3 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
236	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
237	feupdateenv (&env);
238	/ Result is a1 + u.d. /
239	return a1 + u.d;
240	}
241	else if (__glibc_likely (adjust > `0`))
242	{
243	if ((u.ieee.mantissa3 & `1`) == `0` && u.ieee.exponent != `0x7fff`)
244	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
245	feupdateenv (&env);
246	/ Result is a1 + u.d, scaled up. /
247	return (a1 + u.d) * L(`0x1p113`);
248	}
249	else
250	{
251	if ((u.ieee.mantissa3 & `1`) == `0`)
252	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != `0`;
253	v.d = a1 + u.d;
254	/ Ensure the addition is not scheduled after fetestexcept call. /
255	math_force_eval (v.d);
256	int j = fetestexcept (FE_INEXACT) != `0`;
257	feupdateenv (&env);
258	/ Ensure the following computations are performed in default rounding*
259	mode instead of just reusing the round to zero computation. /*
260	asm volatile ("" : "=m" (u) : "m" (u));
261	/ If a1 + u.d is exact, the only rounding happens during*
262	scaling down. /*
263	if (j == `0`)
264	return v.d * L(`0x1p-228`);
265	/ If result rounded to zero is not subnormal, no double*
266	rounding will occur. /*
267	if (v.ieee.exponent > `228`)
268	return (a1 + u.d) * L(`0x1p-228`);
269	/ If v.d * 0x1p-228L with round to zero is a subnormal above*
270	or equal to LDBL_MIN / 2, then v.d 0x1p-228L shifts mantissa*
271	down just by 1 bit, which means v.ieee.mantissa3 \|= j would
272	change the round bit, not sticky or guard bit.
273	v.d 0x1p-228L never normalizes by shifting up,*
274	so round bit plus sticky bit should be already enough
275	for proper rounding. /*
276	if (v.ieee.exponent == `228`)
277	{
278	/ If the exponent would be in the normal range when*
279	rounding to normal precision with unbounded exponent
280	range, the exact result is known and spurious underflows
281	must be avoided on systems detecting tininess after
282	rounding. /*
283	if (TININESS_AFTER_ROUNDING)
284	{
285	w.d = a1 + u.d;
286	if (w.ieee.exponent == `229`)
287	return w.d * L(`0x1p-228`);
288	}
289	/ v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,*
290	v.ieee.mantissa3 & 1 is the round bit and j is our sticky
291	bit. /*
292	w.d = `0`;
293	w.ieee.mantissa3 = ((v.ieee.mantissa3 & `3`) << `1`) \| j;
294	w.ieee.negative = v.ieee.negative;
295	v.ieee.mantissa3 &= ~`3U`;
296	v.d *= L(`0x1p-228`);
297	w.d *= L(`0x1p-2`);
298	return v.d + w.d;
299	}
300	v.ieee.mantissa3 \|= j;
301	return v.d * L(`0x1p-228`);
302	}
303	#endif /* ! USE_FMAL_BUILTIN */
304	}
305	libm_alias_ldouble (__fma, fma)
306

Browse the source code of glibc/sysdeps/ieee754/ldbl-128/s_fmal.c