k_rem_pio2.c source code [glibc/sysdeps/ieee754/dbl-64/k_rem_pio2.c]

1	/ @(#)k_rem_pio2.c 5.1 93/09/24 /
2	/*
3	* ====================================================
4	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5	*
6	* Developed at SunPro, a Sun Microsystems, Inc. business.
7	* Permission to use, copy, modify, and distribute this
8	* software is freely granted, provided that this notice
9	* is preserved.
10	* ====================================================
11	*/
12
13	#if defined(LIBM_SCCS) && !defined(lint)
14	static char rcsid[] = "$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";
15	#endif
16
17	/*
18	* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
19	* double x[],y[]; int e0,nx,prec; int ipio2[];
20	*
21	* __kernel_rem_pio2 return the last three digits of N with
22	* y = x - N*pi/2
23	* so that \|y\| < pi/2.
24	*
25	* The method is to compute the integer (mod 8) and fraction parts of
26	* (2/pi)*x without doing the full multiplication. In general we
27	* skip the part of the product that are known to be a huge integer (
28	* more accurately, = 0 mod 8 ). Thus the number of operations are
29	* independent of the exponent of the input.
30	*
31	* (2/pi) is represented by an array of 24-bit integers in ipio2[].
32	*
33	* Input parameters:
34	* x[] The input value (must be positive) is broken into nx
35	* pieces of 24-bit integers in double precision format.
36	* x[i] will be the i-th 24 bit of x. The scaled exponent
37	* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
38	* match x's up to 24 bits.
39	*
40	* Example of breaking a double positive z into x[0]+x[1]+x[2]:
41	* e0 = ilogb(z)-23
42	* z = scalbn(z,-e0)
43	* for i = 0,1,2
44	* x[i] = floor(z)
45	* z = (z-x[i])2*24
46	*
47	*
48	* y[] ouput result in an array of double precision numbers.
49	* The dimension of y[] is:
50	* 24-bit precision 1
51	* 53-bit precision 2
52	* 64-bit precision 2
53	* 113-bit precision 3
54	* The actual value is the sum of them. Thus for 113-bit
55	* precision, one may have to do something like:
56	*
57	* long double t,w,r_head, r_tail;
58	* t = (long double)y[2] + (long double)y[1];
59	* w = (long double)y[0];
60	* r_head = t+w;
61	* r_tail = w - (r_head - t);
62	*
63	* e0 The exponent of x[0]
64	*
65	* nx dimension of x[]
66	*
67	* prec an integer indicating the precision:
68	* 0 24 bits (single)
69	* 1 53 bits (double)
70	* 2 64 bits (extended)
71	* 3 113 bits (quad)
72	*
73	* ipio2[]
74	* integer array, contains the (24i)-th to (24i+23)-th
75	* bit of 2/pi after binary point. The corresponding
76	* floating value is
77	*
78	* ipio2[i] * 2^(-24(i+1)).
79	*
80	* External function:
81	* double scalbn(), floor();
82	*
83	*
84	* Here is the description of some local variables:
85	*
86	* jk jk+1 is the initial number of terms of ipio2[] needed
87	* in the computation. The recommended value is 2,3,4,
88	* 6 for single, double, extended,and quad.
89	*
90	* jz local integer variable indicating the number of
91	* terms of ipio2[] used.
92	*
93	* jx nx - 1
94	*
95	* jv index for pointing to the suitable ipio2[] for the
96	* computation. In general, we want
97	* ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8
98	* is an integer. Thus
99	* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
100	* Hence jv = max(0,(e0-3)/24).
101	*
102	* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
103	*
104	* q[] double array with integral value, representing the
105	* 24-bits chunk of the product of x and 2/pi.
106	*
107	* q0 the corresponding exponent of q[0]. Note that the
108	* exponent for q[i] would be q0-24*i.
109	*
110	* PIo2[] double precision array, obtained by cutting pi/2
111	* into 24 bits chunks.
112	*
113	* f[] ipio2[] in floating point
114	*
115	* iq[] integer array by breaking up q[] in 24-bits chunk.
116	*
117	* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
118	*
119	* ih integer. If >0 it indicates q[] is >= 0.5, hence
120	* it also indicates the sign of the result.
121	*
122	*/
123
124
125	/*
126	* Constants:
127	* The hexadecimal values are the intended ones for the following
128	* constants. The decimal values may be used, provided that the
129	* compiler will convert from decimal to binary accurately enough
130	* to produce the hexadecimal values shown.
131	*/
132
133	#include <math.h>
134	#include <math-narrow-eval.h>
135	#include <math_private.h>
136	#include <libc-diag.h>
137
138	static const int init_jk[] = {`2`,`3`,`4`,`6`}; / initial value for jk /
139
140	static const double PIo2[] = {
141	`1.57079625129699707031e+00`, / 0x3FF921FB, 0x40000000 /
142	`7.54978941586159635335e-08`, / 0x3E74442D, 0x00000000 /
143	`5.39030252995776476554e-15`, / 0x3CF84698, 0x80000000 /
144	`3.28200341580791294123e-22`, / 0x3B78CC51, 0x60000000 /
145	`1.27065575308067607349e-29`, / 0x39F01B83, 0x80000000 /
146	`1.22933308981111328932e-36`, / 0x387A2520, 0x40000000 /
147	`2.73370053816464559624e-44`, / 0x36E38222, 0x80000000 /
148	`2.16741683877804819444e-51`, / 0x3569F31D, 0x00000000 /
149	};
150
151	static const double
152	zero = `0.0`,
153	one = `1.0`,
154	two24 = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
155	twon24 = `5.96046447753906250000e-08`; / 0x3E700000, 0x00000000 /
156
157	int
158	__kernel_rem_pio2 (double x, double* y, int* e0, int nx, int prec,
159	const int32_t *ipio2)
160	{
161	int32_t jz, jx, jv, jp, jk, carry, n, iq[`20`], i, j, k, m, q0, ih;
162	double z, fw, f[`20`], fq[`20`], q[`20`];
163
164	/ initialize jk/
165	jk = init_jk[prec];
166	jp = jk;
167
168	/ determine jx,jv,q0, note that 3>q0 /
169	jx = nx - `1`;
170	jv = (e0 - `3`) / `24`; if (jv < `0`)
171	jv = `0`;
172	q0 = e0 - `24` * (jv + `1`);
173
174	/ set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] /
175	j = jv - jx; m = jx + jk;
176	for (i = `0`; i <= m; i++, j++)
177	f[i] = (j < `0`) ? zero : (double) ipio2[j];
178
179	/ compute q[0],q[1],...q[jk] /
180	for (i = `0`; i <= jk; i++)
181	{
182	for (j = `0`, fw = `0.0`; j <= jx; j++)
183	fw += x[j] * f[jx + i - j];
184	q[i] = fw;
185	}
186
187	jz = jk;
188	recompute:
189	/ distill q[] into iq[] reversingly /
190	for (i = `0`, j = jz, z = q[jz]; j > `0`; i++, j--)
191	{
192	fw = (double) ((int32_t) (twon24 * z));
193	iq[i] = (int32_t) (z - two24 * fw);
194	z = q[j - `1`] + fw;
195	}
196
197	/ compute n /
198	z = __scalbn (z, q0); / actual value of z /
199	z -= `8.0` * __floor (z * `0.125`); / trim off integer >= 8 /
200	n = (int32_t) z;
201	z -= (double) n;
202	ih = `0`;
203	if (q0 > `0`) / need iq[jz-1] to determine n /
204	{
205	i = (iq[jz - `1`] >> (`24` - q0)); n += i;
206	iq[jz - `1`] -= i << (`24` - q0);
207	ih = iq[jz - `1`] >> (`23` - q0);
208	}
209	else if (q0 == `0`)
210	ih = iq[jz - `1`] >> `23`;
211	else if (z >= `0.5`)
212	ih = `2`;
213
214	if (ih > `0`) / q > 0.5 /
215	{
216	n += `1`; carry = `0`;
217	for (i = `0`; i < jz; i++) / compute 1-q /
218	{
219	j = iq[i];
220	if (carry == `0`)
221	{
222	if (j != `0`)
223	{
224	carry = `1`; iq[i] = `0x1000000` - j;
225	}
226	}
227	else
228	iq[i] = `0xffffff` - j;
229	}
230	if (q0 > `0`) / rare case: chance is 1 in 12 /
231	{
232	switch (q0)
233	{
234	case `1`:
235	iq[jz - `1`] &= `0x7fffff`; break;
236	case `2`:
237	iq[jz - `1`] &= `0x3fffff`; break;
238	}
239	}
240	if (ih == `2`)
241	{
242	z = one - z;
243	if (carry != `0`)
244	z -= __scalbn (one, q0);
245	}
246	}
247
248	/ check if recomputation is needed /
249	if (z == zero)
250	{
251	j = `0`;
252	for (i = jz - `1`; i >= jk; i--)
253	j \|= iq[i];
254	if (j == `0`) / need recomputation /
255	{
256	/ On s390x gcc 6.1 -O3 produces the warning "array subscript is below*
257	array bounds [-Werror=array-bounds]". Only __ieee754_rem_pio2l
258	calls __kernel_rem_pio2 for normal numbers and \|x\| > pi/4 in case
259	of ldbl-96 and \|x\| > 3pi/4 in case of ldbl-128[ibm].
260	Thus x can't be zero and ipio2 is not zero, too. Thus not all iq[]
261	values can't be zero. /*
262	DIAG_PUSH_NEEDS_COMMENT;
263	DIAG_IGNORE_NEEDS_COMMENT (`6.1`, "-Warray-bounds");
264	for (k = `1`; iq[jk - k] == `0`; k++)
265	; / k = no. of terms needed /
266	DIAG_POP_NEEDS_COMMENT;
267
268	for (i = jz + `1`; i <= jz + k; i++) / add q[jz+1] to q[jz+k] /
269	{
270	f[jx + i] = (double) ipio2[jv + i];
271	for (j = `0`, fw = `0.0`; j <= jx; j++)
272	fw += x[j] * f[jx + i - j];
273	q[i] = fw;
274	}
275	jz += k;
276	goto recompute;
277	}
278	}
279
280	/ chop off zero terms /
281	if (z == `0.0`)
282	{
283	jz -= `1`; q0 -= `24`;
284	while (iq[jz] == `0`)
285	{
286	jz--; q0 -= `24`;
287	}
288	}
289	else / break z into 24-bit if necessary /
290	{
291	z = __scalbn (z, -q0);
292	if (z >= two24)
293	{
294	fw = (double) ((int32_t) (twon24 * z));
295	iq[jz] = (int32_t) (z - two24 * fw);
296	jz += `1`; q0 += `24`;
297	iq[jz] = (int32_t) fw;
298	}
299	else
300	iq[jz] = (int32_t) z;
301	}
302
303	/ convert integer "bit" chunk to floating-point value /
304	fw = __scalbn (one, q0);
305	for (i = jz; i >= `0`; i--)
306	{
307	q[i] = fw * (double) iq[i]; fw *= twon24;
308	}
309
310	/ compute PIo2[0,...,jp]q[jz,...,0] /*
311	for (i = jz; i >= `0`; i--)
312	{
313	for (fw = `0.0`, k = `0`; k <= jp && k <= jz - i; k++)
314	fw += PIo2[k] * q[i + k];
315	fq[jz - i] = fw;
316	}
317
318	/ compress fq[] into y[] /
319	switch (prec)
320	{
321	case `0`:
322	fw = `0.0`;
323	for (i = jz; i >= `0`; i--)
324	fw += fq[i];
325	y[`0`] = (ih == `0`) ? fw : -fw;
326	break;
327	case `1`:
328	case `2`:;
329	double fv = `0.0`;
330	for (i = jz; i >= `0`; i--)
331	fv = math_narrow_eval (fv + fq[i]);
332	y[`0`] = (ih == `0`) ? fv : -fv;
333	/ GCC mainline (to be GCC 9), as of 2018-05-22 on i686, warns*
334	that fq[0] may be used uninitialized. This is not possible
335	because jz is always nonnegative when the above loop
336	initializing fq is executed, because the result is never zero
337	to full precision (this function is not called for zero
338	arguments). /*
339	DIAG_PUSH_NEEDS_COMMENT;
340	DIAG_IGNORE_NEEDS_COMMENT (`9`, "-Wmaybe-uninitialized");
341	fv = math_narrow_eval (fq[`0`] - fv);
342	DIAG_POP_NEEDS_COMMENT;
343	for (i = `1`; i <= jz; i++)
344	fv = math_narrow_eval (fv + fq[i]);
345	y[`1`] = (ih == `0`) ? fv : -fv;
346	break;
347	case `3`: / painful /
348	for (i = jz; i > `0`; i--)
349	{
350	double fv = math_narrow_eval (fq[i - `1`] + fq[i]);
351	fq[i] += fq[i - `1`] - fv;
352	fq[i - `1`] = fv;
353	}
354	for (i = jz; i > `1`; i--)
355	{
356	double fv = math_narrow_eval (fq[i - `1`] + fq[i]);
357	fq[i] += fq[i - `1`] - fv;
358	fq[i - `1`] = fv;
359	}
360	for (fw = `0.0`, i = jz; i >= `2`; i--)
361	fw += fq[i];
362	if (ih == `0`)
363	{
364	y[`0`] = fq[`0`]; y[`1`] = fq[`1`]; y[`2`] = fw;
365	}
366	else
367	{
368	y[`0`] = -fq[`0`]; y[`1`] = -fq[`1`]; y[`2`] = -fw;
369	}
370	}
371	return n & `7`;
372	}
373

Browse the source code of glibc/sysdeps/ieee754/dbl-64/k_rem_pio2.c