1/* Compute x * y + z as ternary operation.
2 Copyright (C) 2010-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
24#include <math-barriers.h>
25#include <math_private.h>
26#include <libm-alias-ldouble.h>
27#include <tininess.h>
28#include <math-use-builtins.h>
29
30/* This implementation uses rounding to odd to avoid problems with
31 double rounding. See a paper by Boldo and Melquiond:
32 http://www.lri.fr/~melquion/doc/08-tc.pdf */
33
34_Float128
35__fmal (_Float128 x, _Float128 y, _Float128 z)
36{
37#if USE_FMAL_BUILTIN
38 return __builtin_fmal (x, y, z);
39#else
40 union ieee854_long_double u, v, w;
41 int adjust = 0;
42 u.d = x;
43 v.d = y;
44 w.d = z;
45 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
46 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
47 - LDBL_MANT_DIG, 0)
48 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
49 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
50 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
51 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
52 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
53 {
54 /* If z is Inf, but x and y are finite, the result should be
55 z rather than NaN. */
56 if (w.ieee.exponent == 0x7fff
57 && u.ieee.exponent != 0x7fff
58 && v.ieee.exponent != 0x7fff)
59 return (z + x) + y;
60 /* If z is zero and x are y are nonzero, compute the result
61 as x * y to avoid the wrong sign of a zero result if x * y
62 underflows to 0. */
63 if (z == 0 && x != 0 && y != 0)
64 return x * y;
65 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
66 x * y + z. */
67 if (u.ieee.exponent == 0x7fff
68 || v.ieee.exponent == 0x7fff
69 || w.ieee.exponent == 0x7fff
70 || x == 0
71 || y == 0)
72 return x * y + z;
73 /* If fma will certainly overflow, compute as x * y. */
74 if (u.ieee.exponent + v.ieee.exponent
75 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
76 return x * y;
77 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
78 result nor whether there is underflow depends on its exact
79 value, only on its sign. */
80 if (u.ieee.exponent + v.ieee.exponent
81 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
82 {
83 int neg = u.ieee.negative ^ v.ieee.negative;
84 _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
85 if (w.ieee.exponent >= 3)
86 return tiny + z;
87 /* Scaling up, adding TINY and scaling down produces the
88 correct result, because in round-to-nearest mode adding
89 TINY has no effect and in other modes double rounding is
90 harmless. But it may not produce required underflow
91 exceptions. */
92 v.d = z * L(0x1p114) + tiny;
93 if (TININESS_AFTER_ROUNDING
94 ? v.ieee.exponent < 115
95 : (w.ieee.exponent == 0
96 || (w.ieee.exponent == 1
97 && w.ieee.negative != neg
98 && w.ieee.mantissa3 == 0
99 && w.ieee.mantissa2 == 0
100 && w.ieee.mantissa1 == 0
101 && w.ieee.mantissa0 == 0)))
102 {
103 _Float128 force_underflow = x * y;
104 math_force_eval (force_underflow);
105 }
106 return v.d * L(0x1p-114);
107 }
108 if (u.ieee.exponent + v.ieee.exponent
109 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
110 {
111 /* Compute 1p-113 times smaller result and multiply
112 at the end. */
113 if (u.ieee.exponent > v.ieee.exponent)
114 u.ieee.exponent -= LDBL_MANT_DIG;
115 else
116 v.ieee.exponent -= LDBL_MANT_DIG;
117 /* If x + y exponent is very large and z exponent is very small,
118 it doesn't matter if we don't adjust it. */
119 if (w.ieee.exponent > LDBL_MANT_DIG)
120 w.ieee.exponent -= LDBL_MANT_DIG;
121 adjust = 1;
122 }
123 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
124 {
125 /* Similarly.
126 If z exponent is very large and x and y exponents are
127 very small, adjust them up to avoid spurious underflows,
128 rather than down. */
129 if (u.ieee.exponent + v.ieee.exponent
130 <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
131 {
132 if (u.ieee.exponent > v.ieee.exponent)
133 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
134 else
135 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
136 }
137 else if (u.ieee.exponent > v.ieee.exponent)
138 {
139 if (u.ieee.exponent > LDBL_MANT_DIG)
140 u.ieee.exponent -= LDBL_MANT_DIG;
141 }
142 else if (v.ieee.exponent > LDBL_MANT_DIG)
143 v.ieee.exponent -= LDBL_MANT_DIG;
144 w.ieee.exponent -= LDBL_MANT_DIG;
145 adjust = 1;
146 }
147 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
148 {
149 u.ieee.exponent -= LDBL_MANT_DIG;
150 if (v.ieee.exponent)
151 v.ieee.exponent += LDBL_MANT_DIG;
152 else
153 v.d *= L(0x1p113);
154 }
155 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
156 {
157 v.ieee.exponent -= LDBL_MANT_DIG;
158 if (u.ieee.exponent)
159 u.ieee.exponent += LDBL_MANT_DIG;
160 else
161 u.d *= L(0x1p113);
162 }
163 else /* if (u.ieee.exponent + v.ieee.exponent
164 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
165 {
166 if (u.ieee.exponent > v.ieee.exponent)
167 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
168 else
169 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
170 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
171 {
172 if (w.ieee.exponent)
173 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
174 else
175 w.d *= L(0x1p228);
176 adjust = -1;
177 }
178 /* Otherwise x * y should just affect inexact
179 and nothing else. */
180 }
181 x = u.d;
182 y = v.d;
183 z = w.d;
184 }
185
186 /* Ensure correct sign of exact 0 + 0. */
187 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
188 {
189 x = math_opt_barrier (x);
190 return x * y + z;
191 }
192
193 fenv_t env;
194 feholdexcept (&env);
195 fesetround (FE_TONEAREST);
196
197 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
198#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
199 _Float128 x1 = x * C;
200 _Float128 y1 = y * C;
201 _Float128 m1 = x * y;
202 x1 = (x - x1) + x1;
203 y1 = (y - y1) + y1;
204 _Float128 x2 = x - x1;
205 _Float128 y2 = y - y1;
206 _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
207
208 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
209 _Float128 a1 = z + m1;
210 _Float128 t1 = a1 - z;
211 _Float128 t2 = a1 - t1;
212 t1 = m1 - t1;
213 t2 = z - t2;
214 _Float128 a2 = t1 + t2;
215 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
216 math_force_eval (m2);
217 math_force_eval (a2);
218 feclearexcept (FE_INEXACT);
219
220 /* If the result is an exact zero, ensure it has the correct sign. */
221 if (a1 == 0 && m2 == 0)
222 {
223 feupdateenv (&env);
224 /* Ensure that round-to-nearest value of z + m1 is not reused. */
225 z = math_opt_barrier (z);
226 return z + m1;
227 }
228
229 fesetround (FE_TOWARDZERO);
230 /* Perform m2 + a2 addition with round to odd. */
231 u.d = a2 + m2;
232
233 if (__glibc_likely (adjust == 0))
234 {
235 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
236 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
237 feupdateenv (&env);
238 /* Result is a1 + u.d. */
239 return a1 + u.d;
240 }
241 else if (__glibc_likely (adjust > 0))
242 {
243 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
244 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
245 feupdateenv (&env);
246 /* Result is a1 + u.d, scaled up. */
247 return (a1 + u.d) * L(0x1p113);
248 }
249 else
250 {
251 if ((u.ieee.mantissa3 & 1) == 0)
252 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
253 v.d = a1 + u.d;
254 /* Ensure the addition is not scheduled after fetestexcept call. */
255 math_force_eval (v.d);
256 int j = fetestexcept (FE_INEXACT) != 0;
257 feupdateenv (&env);
258 /* Ensure the following computations are performed in default rounding
259 mode instead of just reusing the round to zero computation. */
260 asm volatile ("" : "=m" (u) : "m" (u));
261 /* If a1 + u.d is exact, the only rounding happens during
262 scaling down. */
263 if (j == 0)
264 return v.d * L(0x1p-228);
265 /* If result rounded to zero is not subnormal, no double
266 rounding will occur. */
267 if (v.ieee.exponent > 228)
268 return (a1 + u.d) * L(0x1p-228);
269 /* If v.d * 0x1p-228L with round to zero is a subnormal above
270 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
271 down just by 1 bit, which means v.ieee.mantissa3 |= j would
272 change the round bit, not sticky or guard bit.
273 v.d * 0x1p-228L never normalizes by shifting up,
274 so round bit plus sticky bit should be already enough
275 for proper rounding. */
276 if (v.ieee.exponent == 228)
277 {
278 /* If the exponent would be in the normal range when
279 rounding to normal precision with unbounded exponent
280 range, the exact result is known and spurious underflows
281 must be avoided on systems detecting tininess after
282 rounding. */
283 if (TININESS_AFTER_ROUNDING)
284 {
285 w.d = a1 + u.d;
286 if (w.ieee.exponent == 229)
287 return w.d * L(0x1p-228);
288 }
289 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
290 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
291 bit. */
292 w.d = 0;
293 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
294 w.ieee.negative = v.ieee.negative;
295 v.ieee.mantissa3 &= ~3U;
296 v.d *= L(0x1p-228);
297 w.d *= L(0x1p-2);
298 return v.d + w.d;
299 }
300 v.ieee.mantissa3 |= j;
301 return v.d * L(0x1p-228);
302 }
303#endif /* ! USE_FMAL_BUILTIN */
304}
305libm_alias_ldouble (__fma, fma)
306