e_powl.c source code [glibc/sysdeps/ieee754/ldbl-128/e_powl.c]

1	/*
2	* ====================================================
3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4	*
5	* Developed at SunPro, a Sun Microsystems, Inc. business.
6	* Permission to use, copy, modify, and distribute this
7	* software is freely granted, provided that this notice
8	* is preserved.
9	* ====================================================
10	*/
11
12	/ Expansions and modifications for 128-bit long double are*
13	Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14	and are incorporated herein by permission of the author. The author
15	reserves the right to distribute this material elsewhere under different
16	copying permissions. These modifications are distributed here under
17	the following terms:
18
19	This library is free software; you can redistribute it and/or
20	modify it under the terms of the GNU Lesser General Public
21	License as published by the Free Software Foundation; either
22	version 2.1 of the License, or (at your option) any later version.
23
24	This library is distributed in the hope that it will be useful,
25	but WITHOUT ANY WARRANTY; without even the implied warranty of
26	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27	Lesser General Public License for more details.
28
29	You should have received a copy of the GNU Lesser General Public
30	License along with this library; if not, see
31	<http://www.gnu.org/licenses/>. /*
32
33	/ __ieee754_powl(x,y) return x*y
34	*
35	* n
36	* Method: Let x = 2 * (1+f)
37	* 1. Compute and return log2(x) in two pieces:
38	* log2(x) = w1 + w2,
39	* where w1 has 113-53 = 60 bit trailing zeros.
40	* 2. Perform y*log2(x) = n+y' by simulating muti-precision
41	* arithmetic, where \|y'\|<=0.5.
42	* 3. Return xy = 2nexp(y'log2)
43	*
44	* Special cases:
45	* 1. (anything) ** 0 is 1
46	* 2. (anything) ** 1 is itself
47	* 3. (anything) ** NAN is NAN
48	* 4. NAN ** (anything except 0) is NAN
49	* 5. +-(\|x\| > 1) ** +INF is +INF
50	* 6. +-(\|x\| > 1) ** -INF is +0
51	* 7. +-(\|x\| < 1) ** +INF is +0
52	* 8. +-(\|x\| < 1) ** -INF is +INF
53	* 9. +-1 ** +-INF is NAN
54	* 10. +0 ** (+anything except 0, NAN) is +0
55	* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56	* 12. +0 ** (-anything except 0, NAN) is +INF
57	* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58	* 14. -0 (odd integer) = -( +0 (odd integer) )
59	* 15. +INF ** (+anything except 0,NAN) is +INF
60	* 16. +INF ** (-anything except 0,NAN) is +0
61	* 17. -INF (anything) = -0 (-anything)
62	* 18. (-anything) (integer) is (-1)(integer)(+anything*integer)
63	* 19. (-anything except 0 and inf) ** (non-integer) is NAN
64	*
65	*/
66
67	#include <math.h>
68	#include <math_private.h>
69
70	static const _Float128 bp[] = {
71	`1`,
72	L(`1.5`),
73	};
74
75	/ log_2(1.5) /
76	static const _Float128 dp_h[] = {
77	`0.0`,
78	L(`5.8496250072115607565592654282227158546448E-1`)
79	};
80
81	/ Low part of log_2(1.5) /
82	static const _Float128 dp_l[] = {
83	`0.0`,
84	L(`1.0579781240112554492329533686862998106046E-16`)
85	};
86
87	static const _Float128 zero = `0`,
88	one = `1`,
89	two = `2`,
90	two113 = L(`1.0384593717069655257060992658440192E34`),
91	huge = L(`1.0e3000`),
92	tiny = L(`1.0e-3000`);
93
94	/ 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))*
95	z = (x-1)/(x+1)
96	1 <= x <= 1.25
97	Peak relative error 2.3e-37 /*
98	static const _Float128 LN[] =
99	{
100	L(-`3.0779177200290054398792536829702930623200E1`),
101	L(`6.5135778082209159921251824580292116201640E1`),
102	L(-`4.6312921812152436921591152809994014413540E1`),
103	L(`1.2510208195629420304615674658258363295208E1`),
104	L(-`9.9266909031921425609179910128531667336670E-1`)
105	};
106	static const _Float128 LD[] =
107	{
108	L(-`5.129862866715009066465422805058933131960E1`),
109	L(`1.452015077564081884387441590064272782044E2`),
110	L(-`1.524043275549860505277434040464085593165E2`),
111	L(`7.236063513651544224319663428634139768808E1`),
112	L(-`1.494198912340228235853027849917095580053E1`)
113	/ 1.0E0 /
114	};
115
116	/ exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))*
117	0 <= x <= 0.5
118	Peak relative error 5.7e-38 /*
119	static const _Float128 PN[] =
120	{
121	L(`5.081801691915377692446852383385968225675E8`),
122	L(`9.360895299872484512023336636427675327355E6`),
123	L(`4.213701282274196030811629773097579432957E4`),
124	L(`5.201006511142748908655720086041570288182E1`),
125	L(`9.088368420359444263703202925095675982530E-3`),
126	};
127	static const _Float128 PD[] =
128	{
129	L(`3.049081015149226615468111430031590411682E9`),
130	L(`1.069833887183886839966085436512368982758E8`),
131	L(`8.259257717868875207333991924545445705394E5`),
132	L(`1.872583833284143212651746812884298360922E3`),
133	/ 1.0E0 /
134	};
135
136	static const _Float128
137	/ ln 2 /
138	lg2 = L(`6.9314718055994530941723212145817656807550E-1`),
139	lg2_h = L(`6.9314718055994528622676398299518041312695E-1`),
140	lg2_l = L(`2.3190468138462996154948554638754786504121E-17`),
141	ovt = L(`8.0085662595372944372e-0017`),
142	/ 2/(3log(2)) /*
143	cp = L(`9.6179669392597560490661645400126142495110E-1`),
144	cp_h = L(`9.6179669392597555432899980587535537779331E-1`),
145	cp_l = L(`5.0577616648125906047157785230014751039424E-17`);
146
147	_Float128
148	__ieee754_powl (_Float128 x, _Float128 y)
149	{
150	_Float128 z, ax, z_h, z_l, p_h, p_l;
151	_Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
152	_Float128 s2, s_h, s_l, t_h, t_l, ay;
153	int32_t i, j, k, yisint, n;
154	u_int32_t ix, iy;
155	int32_t hx, hy;
156	ieee854_long_double_shape_type o, p, q;
157
158	p.value = x;
159	hx = p.parts32.w0;
160	ix = hx & `0x7fffffff`;
161
162	q.value = y;
163	hy = q.parts32.w0;
164	iy = hy & `0x7fffffff`;
165
166
167	/ y==zero: x*0 = 1 /*
168	if ((iy \| q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`
169	&& !issignaling (x))
170	return one;
171
172	/ 1.0y = 1; -1.0+-Inf = 1 /
173	if (x == one && !issignaling (y))
174	return one;
175	if (x == -`1` && iy == `0x7fff0000`
176	&& (q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`)
177	return one;
178
179	/ +-NaN return x+y /
180	if ((ix > `0x7fff0000`)
181	\|\| ((ix == `0x7fff0000`)
182	&& ((p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3) != `0`))
183	\|\| (iy > `0x7fff0000`)
184	\|\| ((iy == `0x7fff0000`)
185	&& ((q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) != `0`)))
186	return x + y;
187
188	/ determine if y is an odd int when x < 0*
189	* yisint = 0 ... y is not an integer
190	* yisint = 1 ... y is an odd int
191	* yisint = 2 ... y is an even int
192	*/
193	yisint = `0`;
194	if (hx < `0`)
195	{
196	if (iy >= `0x40700000`) / 2^113 /
197	yisint = `2`; / even integer y /
198	else if (iy >= `0x3fff0000`) / 1.0 /
199	{
200	if (__floorl (y) == y)
201	{
202	z = `0.5` * y;
203	if (__floorl (z) == z)
204	yisint = `2`;
205	else
206	yisint = `1`;
207	}
208	}
209	}
210
211	/ special value of y /
212	if ((q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`)
213	{
214	if (iy == `0x7fff0000`) / y is +-inf /
215	{
216	if (((ix - `0x3fff0000`) \| p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3)
217	== `0`)
218	return y - y; / +-1*inf is NaN /*
219	else if (ix >= `0x3fff0000`) / (\|x\|>1)*+-inf = inf,0 /*
220	return (hy >= `0`) ? y : zero;
221	else / (\|x\|<1)*-,+inf = inf,0 /*
222	return (hy < `0`) ? -y : zero;
223	}
224	if (iy == `0x3fff0000`)
225	{ / y is +-1 /
226	if (hy < `0`)
227	return one / x;
228	else
229	return x;
230	}
231	if (hy == `0x40000000`)
232	return x * x; / y is 2 /
233	if (hy == `0x3ffe0000`)
234	{ / y is 0.5 /
235	if (hx >= `0`) / x >= +0 /
236	return __ieee754_sqrtl (x);
237	}
238	}
239
240	ax = fabsl (x);
241	/ special value of x /
242	if ((p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3) == `0`)
243	{
244	if (ix == `0x7fff0000` \|\| ix == `0` \|\| ix == `0x3fff0000`)
245	{
246	z = ax; /x is +-0,+-inf,+-1 /
247	if (hy < `0`)
248	z = one / z; / z = (1/\|x\|) /
249	if (hx < `0`)
250	{
251	if (((ix - `0x3fff0000`) \| yisint) == `0`)
252	{
253	z = (z - z) / (z - z); / (-1)*non-int is NaN /*
254	}
255	else if (yisint == `1`)
256	z = -z; / (x<0)odd = -(\|x\|odd) /
257	}
258	return z;
259	}
260	}
261
262	/ (x<0)*(non-int) is NaN /*
263	if (((((u_int32_t) hx >> `31`) - `1`) \| yisint) == `0`)
264	return (x - x) / (x - x);
265
266	/ sgn (sign of result -ve*odd) = -1 else = 1 /*
267	sgn = one;
268	if (((((u_int32_t) hx >> `31`) - `1`) \| (yisint - `1`)) == `0`)
269	sgn = -one; / (-ve)*(odd int) /*
270
271	/ \|y\| is huge.*
272	2^-16495 = 1/2 of smallest representable value.
273	If (1 - 1/131072)^y underflows, y > 1.4986e9 /*
274	if (iy > `0x401d654b`)
275	{
276	/ if (1 - 2^-113)^y underflows, y > 1.1873e38 /
277	if (iy > `0x407d654b`)
278	{
279	if (ix <= `0x3ffeffff`)
280	return (hy < `0`) ? huge * huge : tiny * tiny;
281	if (ix >= `0x3fff0000`)
282	return (hy > `0`) ? huge * huge : tiny * tiny;
283	}
284	/ over/underflow if x is not close to one /
285	if (ix < `0x3ffeffff`)
286	return (hy < `0`) ? sgn * huge * huge : sgn * tiny * tiny;
287	if (ix > `0x3fff0000`)
288	return (hy > `0`) ? sgn * huge * huge : sgn * tiny * tiny;
289	}
290
291	ay = y > `0` ? y : -y;
292	if (ay < `0x1p-128`)
293	y = y < `0` ? -`0x1p-128` : `0x1p-128`;
294
295	n = `0`;
296	/ take care subnormal number /
297	if (ix < `0x00010000`)
298	{
299	ax *= two113;
300	n -= `113`;
301	o.value = ax;
302	ix = o.parts32.w0;
303	}
304	n += ((ix) >> `16`) - `0x3fff`;
305	j = ix & `0x0000ffff`;
306	/ determine interval /
307	ix = j \| `0x3fff0000`; / normalize ix /
308	if (j <= `0x3988`)
309	k = `0`; / \|x\|<sqrt(3/2) /
310	else if (j < `0xbb67`)
311	k = `1`; / \|x\|<sqrt(3) /
312	else
313	{
314	k = `0`;
315	n += `1`;
316	ix -= `0x00010000`;
317	}
318
319	o.value = ax;
320	o.parts32.w0 = ix;
321	ax = o.value;
322
323	/ compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) /
324	u = ax - bp[k]; / bp[0]=1.0, bp[1]=1.5 /
325	v = one / (ax + bp[k]);
326	s = u * v;
327	s_h = s;
328
329	o.value = s_h;
330	o.parts32.w3 = `0`;
331	o.parts32.w2 &= `0xf8000000`;
332	s_h = o.value;
333	/ t_h=ax+bp[k] High /
334	t_h = ax + bp[k];
335	o.value = t_h;
336	o.parts32.w3 = `0`;
337	o.parts32.w2 &= `0xf8000000`;
338	t_h = o.value;
339	t_l = ax - (t_h - bp[k]);
340	s_l = v * ((u - s_h * t_h) - s_h * t_l);
341	/ compute log(ax) /
342	s2 = s * s;
343	u = LN[`0`] + s2 * (LN[`1`] + s2 * (LN[`2`] + s2 * (LN[`3`] + s2 * LN[`4`])));
344	v = LD[`0`] + s2 * (LD[`1`] + s2 * (LD[`2`] + s2 * (LD[`3`] + s2 * (LD[`4`] + s2))));
345	r = s2 * s2 * u / v;
346	r += s_l * (s_h + s);
347	s2 = s_h * s_h;
348	t_h = `3.0` + s2 + r;
349	o.value = t_h;
350	o.parts32.w3 = `0`;
351	o.parts32.w2 &= `0xf8000000`;
352	t_h = o.value;
353	t_l = r - ((t_h - `3.0`) - s2);
354	/ u+v = s(1+...) /*
355	u = s_h * t_h;
356	v = s_l * t_h + t_l * s;
357	/ 2/(3log2)(s+...) /*
358	p_h = u + v;
359	o.value = p_h;
360	o.parts32.w3 = `0`;
361	o.parts32.w2 &= `0xf8000000`;
362	p_h = o.value;
363	p_l = v - (p_h - u);
364	z_h = cp_h * p_h; / cp_h+cp_l = 2/(3log2) /*
365	z_l = cp_l * p_h + p_l * cp + dp_l[k];
366	/ log2(ax) = (s+..)2/(3log2) = n + dp_h + z_h + z_l /
367	t = (_Float128) n;
368	t1 = (((z_h + z_l) + dp_h[k]) + t);
369	o.value = t1;
370	o.parts32.w3 = `0`;
371	o.parts32.w2 &= `0xf8000000`;
372	t1 = o.value;
373	t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
374
375	/ split up y into y1+y2 and compute (y1+y2)(t1+t2) /*
376	y1 = y;
377	o.value = y1;
378	o.parts32.w3 = `0`;
379	o.parts32.w2 &= `0xf8000000`;
380	y1 = o.value;
381	p_l = (y - y1) * t1 + y * t2;
382	p_h = y1 * t1;
383	z = p_l + p_h;
384	o.value = z;
385	j = o.parts32.w0;
386	if (j >= `0x400d0000`) / z >= 16384 /
387	{
388	/ if z > 16384 /
389	if (((j - `0x400d0000`) \| o.parts32.w1 \| o.parts32.w2 \| o.parts32.w3) != `0`)
390	return sgn * huge * huge; / overflow /
391	else
392	{
393	if (p_l + ovt > z - p_h)
394	return sgn * huge * huge; / overflow /
395	}
396	}
397	else if ((j & `0x7fffffff`) >= `0x400d01b9`) / z <= -16495 /
398	{
399	/ z < -16495 /
400	if (((j - `0xc00d01bc`) \| o.parts32.w1 \| o.parts32.w2 \| o.parts32.w3)
401	!= `0`)
402	return sgn * tiny * tiny; / underflow /
403	else
404	{
405	if (p_l <= z - p_h)
406	return sgn * tiny * tiny; / underflow /
407	}
408	}
409	/ compute 2*(p_h+p_l) /*
410	i = j & `0x7fffffff`;
411	k = (i >> `16`) - `0x3fff`;
412	n = `0`;
413	if (i > `0x3ffe0000`)
414	{ / if \|z\| > 0.5, set n = [z+0.5] /
415	n = __floorl (z + L(`0.5`));
416	t = n;
417	p_h -= t;
418	}
419	t = p_l + p_h;
420	o.value = t;
421	o.parts32.w3 = `0`;
422	o.parts32.w2 &= `0xf8000000`;
423	t = o.value;
424	u = t * lg2_h;
425	v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
426	z = u + v;
427	w = v - (z - u);
428	/ exp(z) /
429	t = z * z;
430	u = PN[`0`] + t * (PN[`1`] + t * (PN[`2`] + t * (PN[`3`] + t * PN[`4`])));
431	v = PD[`0`] + t * (PD[`1`] + t * (PD[`2`] + t * (PD[`3`] + t)));
432	t1 = z - t * u / v;
433	r = (z * t1) / (t1 - two) - (w + z * w);
434	z = one - (r - z);
435	o.value = z;
436	j = o.parts32.w0;
437	j += (n << `16`);
438	if ((j >> `16`) <= `0`)
439	{
440	z = __scalbnl (z, n); / subnormal output /
441	_Float128 force_underflow = z * z;
442	math_force_eval (force_underflow);
443	}
444	else
445	{
446	o.parts32.w0 = j;
447	z = o.value;
448	}
449	return sgn * z;
450	}
451	strong_alias (__ieee754_powl, __powl_finite)
452

Browse the source code of glibc/sysdeps/ieee754/ldbl-128/e_powl.c