1/* k_rem_pio2f.c -- float version of k_rem_pio2.c
2 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
3 */
4
5/*
6 * ====================================================
7 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
8 *
9 * Developed at SunPro, a Sun Microsystems, Inc. business.
10 * Permission to use, copy, modify, and distribute this
11 * software is freely granted, provided that this notice
12 * is preserved.
13 * ====================================================
14 */
15
16#if defined(LIBM_SCCS) && !defined(lint)
17static char rcsid[] = "$NetBSD: k_rem_pio2f.c,v 1.4 1995/05/10 20:46:28 jtc Exp $";
18#endif
19
20#include <math.h>
21#include <math-narrow-eval.h>
22#include <math_private.h>
23#include <libc-diag.h>
24
25/* In the float version, the input parameter x contains 8 bit
26 integers, not 24 bit integers. 113 bit precision is not supported. */
27
28static const int init_jk[] = {4,7,9}; /* initial value for jk */
29
30static const float PIo2[] = {
31 1.5703125000e+00, /* 0x3fc90000 */
32 4.5776367188e-04, /* 0x39f00000 */
33 2.5987625122e-05, /* 0x37da0000 */
34 7.5437128544e-08, /* 0x33a20000 */
35 6.0026650317e-11, /* 0x2e840000 */
36 7.3896444519e-13, /* 0x2b500000 */
37 5.3845816694e-15, /* 0x27c20000 */
38 5.6378512969e-18, /* 0x22d00000 */
39 8.3009228831e-20, /* 0x1fc40000 */
40 3.2756352257e-22, /* 0x1bc60000 */
41 6.3331015649e-25, /* 0x17440000 */
42};
43
44static const float
45zero = 0.0,
46one = 1.0,
47two8 = 2.5600000000e+02, /* 0x43800000 */
48twon8 = 3.9062500000e-03; /* 0x3b800000 */
49
50int __kernel_rem_pio2f(float *x, float *y, int e0, int nx, int prec, const int32_t *ipio2)
51{
52 int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
53 float z,fw,f[20],fq[20],q[20];
54
55 /* initialize jk*/
56 jk = init_jk[prec];
57 jp = jk;
58
59 /* determine jx,jv,q0, note that 3>q0 */
60 jx = nx-1;
61 jv = (e0-3)/8; if(jv<0) jv=0;
62 q0 = e0-8*(jv+1);
63
64 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
65 j = jv-jx; m = jx+jk;
66 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (float) ipio2[j];
67
68 /* compute q[0],q[1],...q[jk] */
69 for (i=0;i<=jk;i++) {
70 for(j=0,fw=0.0;j<=jx;j++)
71 fw += x[j]*f[jx+i-j];
72 q[i] = fw;
73 }
74
75 jz = jk;
76recompute:
77 /* distill q[] into iq[] reversingly */
78 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
79 fw = (float)((int32_t)(twon8* z));
80 iq[i] = (int32_t)(z-two8*fw);
81 z = q[j-1]+fw;
82 }
83
84 /* compute n */
85 z = __scalbnf(z,q0); /* actual value of z */
86 z -= (float)8.0*__floorf(z*(float)0.125); /* trim off integer >= 8 */
87 n = (int32_t) z;
88 z -= (float)n;
89 ih = 0;
90 if(q0>0) { /* need iq[jz-1] to determine n */
91 i = (iq[jz-1]>>(8-q0)); n += i;
92 iq[jz-1] -= i<<(8-q0);
93 ih = iq[jz-1]>>(7-q0);
94 }
95 else if(q0==0) ih = iq[jz-1]>>7;
96 else if(z>=(float)0.5) ih=2;
97
98 if(ih>0) { /* q > 0.5 */
99 n += 1; carry = 0;
100 for(i=0;i<jz ;i++) { /* compute 1-q */
101 j = iq[i];
102 if(carry==0) {
103 if(j!=0) {
104 carry = 1; iq[i] = 0x100- j;
105 }
106 } else iq[i] = 0xff - j;
107 }
108 if(q0>0) { /* rare case: chance is 1 in 12 */
109 switch(q0) {
110 case 1:
111 iq[jz-1] &= 0x7f; break;
112 case 2:
113 iq[jz-1] &= 0x3f; break;
114 }
115 }
116 if(ih==2) {
117 z = one - z;
118 if(carry!=0) z -= __scalbnf(one,q0);
119 }
120 }
121
122 /* check if recomputation is needed */
123 if(z==zero) {
124 j = 0;
125 for (i=jz-1;i>=jk;i--) j |= iq[i];
126 if(j==0) { /* need recomputation */
127 /* On s390x gcc 6.1 -O3 produces the warning "array subscript is
128 below array bounds [-Werror=array-bounds]". Only
129 __ieee754_rem_pio2f calls __kernel_rem_pio2f for normal
130 numbers and |x| ~> 2^7*(pi/2). Thus x can't be zero and
131 ipio2 is not zero, too. Thus not all iq[] values can't be
132 zero. */
133 DIAG_PUSH_NEEDS_COMMENT;
134 DIAG_IGNORE_NEEDS_COMMENT (6.1, "-Warray-bounds");
135 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
136 DIAG_POP_NEEDS_COMMENT;
137
138 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
139 f[jx+i] = (float) ipio2[jv+i];
140 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
141 q[i] = fw;
142 }
143 jz += k;
144 goto recompute;
145 }
146 }
147
148 /* chop off zero terms */
149 if(z==(float)0.0) {
150 jz -= 1; q0 -= 8;
151 while(iq[jz]==0) { jz--; q0-=8;}
152 } else { /* break z into 8-bit if necessary */
153 z = __scalbnf(z,-q0);
154 if(z>=two8) {
155 fw = (float)((int32_t)(twon8*z));
156 iq[jz] = (int32_t)(z-two8*fw);
157 jz += 1; q0 += 8;
158 iq[jz] = (int32_t) fw;
159 } else iq[jz] = (int32_t) z ;
160 }
161
162 /* convert integer "bit" chunk to floating-point value */
163 fw = __scalbnf(one,q0);
164 for(i=jz;i>=0;i--) {
165 q[i] = fw*(float)iq[i]; fw*=twon8;
166 }
167
168 /* compute PIo2[0,...,jp]*q[jz,...,0] */
169 for(i=jz;i>=0;i--) {
170 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
171 fq[jz-i] = fw;
172 }
173
174 /* compress fq[] into y[] */
175 switch(prec) {
176 case 0:
177 fw = 0.0;
178 for (i=jz;i>=0;i--) fw += fq[i];
179 y[0] = (ih==0)? fw: -fw;
180 break;
181 case 1:
182 case 2:;
183 float fv = 0.0;
184 for (i=jz;i>=0;i--) fv = math_narrow_eval (fv + fq[i]);
185 y[0] = (ih==0)? fv: -fv;
186 /* GCC mainline (to be GCC 9), as of 2018-05-22 on
187 i686, warns that fq[0] may be used uninitialized.
188 This is not possible because jz is always
189 nonnegative when the above loop initializing fq is
190 executed, because the result is never zero to full
191 precision (this function is not called for zero
192 arguments). */
193 DIAG_PUSH_NEEDS_COMMENT;
194 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
195 fv = math_narrow_eval (fq[0]-fv);
196 DIAG_POP_NEEDS_COMMENT;
197 for (i=1;i<=jz;i++) fv = math_narrow_eval (fv + fq[i]);
198 y[1] = (ih==0)? fv: -fv;
199 break;
200 case 3: /* painful */
201 for (i=jz;i>0;i--) {
202 float fv = math_narrow_eval (fq[i-1]+fq[i]);
203 fq[i] += fq[i-1]-fv;
204 fq[i-1] = fv;
205 }
206 for (i=jz;i>1;i--) {
207 float fv = math_narrow_eval (fq[i-1]+fq[i]);
208 fq[i] += fq[i-1]-fv;
209 fq[i-1] = fv;
210 }
211 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
212 if(ih==0) {
213 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
214 } else {
215 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
216 }
217 }
218 return n&7;
219}
220