1 | /* Software floating-point emulation. |
2 | Basic one-word fraction declaration and manipulation. |
3 | Copyright (C) 1997-2021 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | Contributed by Richard Henderson (rth@cygnus.com), |
6 | Jakub Jelinek (jj@ultra.linux.cz), |
7 | David S. Miller (davem@redhat.com) and |
8 | Peter Maydell (pmaydell@chiark.greenend.org.uk). |
9 | |
10 | The GNU C Library is free software; you can redistribute it and/or |
11 | modify it under the terms of the GNU Lesser General Public |
12 | License as published by the Free Software Foundation; either |
13 | version 2.1 of the License, or (at your option) any later version. |
14 | |
15 | In addition to the permissions in the GNU Lesser General Public |
16 | License, the Free Software Foundation gives you unlimited |
17 | permission to link the compiled version of this file into |
18 | combinations with other programs, and to distribute those |
19 | combinations without any restriction coming from the use of this |
20 | file. (The Lesser General Public License restrictions do apply in |
21 | other respects; for example, they cover modification of the file, |
22 | and distribution when not linked into a combine executable.) |
23 | |
24 | The GNU C Library is distributed in the hope that it will be useful, |
25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
27 | Lesser General Public License for more details. |
28 | |
29 | You should have received a copy of the GNU Lesser General Public |
30 | License along with the GNU C Library; if not, see |
31 | <https://www.gnu.org/licenses/>. */ |
32 | |
33 | #ifndef SOFT_FP_OP_1_H |
34 | #define SOFT_FP_OP_1_H 1 |
35 | |
36 | #define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f _FP_ZERO_INIT |
37 | #define _FP_FRAC_COPY_1(D, S) (D##_f = S##_f) |
38 | #define _FP_FRAC_SET_1(X, I) (X##_f = I) |
39 | #define _FP_FRAC_HIGH_1(X) (X##_f) |
40 | #define _FP_FRAC_LOW_1(X) (X##_f) |
41 | #define _FP_FRAC_WORD_1(X, w) (X##_f) |
42 | |
43 | #define _FP_FRAC_ADDI_1(X, I) (X##_f += I) |
44 | #define _FP_FRAC_SLL_1(X, N) \ |
45 | do \ |
46 | { \ |
47 | if (__builtin_constant_p (N) && (N) == 1) \ |
48 | X##_f += X##_f; \ |
49 | else \ |
50 | X##_f <<= (N); \ |
51 | } \ |
52 | while (0) |
53 | #define _FP_FRAC_SRL_1(X, N) (X##_f >>= N) |
54 | |
55 | /* Right shift with sticky-lsb. */ |
56 | #define _FP_FRAC_SRST_1(X, S, N, sz) __FP_FRAC_SRST_1 (X##_f, S, (N), (sz)) |
57 | #define _FP_FRAC_SRS_1(X, N, sz) __FP_FRAC_SRS_1 (X##_f, (N), (sz)) |
58 | |
59 | #define __FP_FRAC_SRST_1(X, S, N, sz) \ |
60 | do \ |
61 | { \ |
62 | S = (__builtin_constant_p (N) && (N) == 1 \ |
63 | ? X & 1 \ |
64 | : (X << (_FP_W_TYPE_SIZE - (N))) != 0); \ |
65 | X = X >> (N); \ |
66 | } \ |
67 | while (0) |
68 | |
69 | #define __FP_FRAC_SRS_1(X, N, sz) \ |
70 | (X = (X >> (N) | (__builtin_constant_p (N) && (N) == 1 \ |
71 | ? X & 1 \ |
72 | : (X << (_FP_W_TYPE_SIZE - (N))) != 0))) |
73 | |
74 | #define _FP_FRAC_ADD_1(R, X, Y) (R##_f = X##_f + Y##_f) |
75 | #define _FP_FRAC_SUB_1(R, X, Y) (R##_f = X##_f - Y##_f) |
76 | #define _FP_FRAC_DEC_1(X, Y) (X##_f -= Y##_f) |
77 | #define _FP_FRAC_CLZ_1(z, X) __FP_CLZ ((z), X##_f) |
78 | |
79 | /* Predicates. */ |
80 | #define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE) X##_f < 0) |
81 | #define _FP_FRAC_ZEROP_1(X) (X##_f == 0) |
82 | #define _FP_FRAC_OVERP_1(fs, X) (X##_f & _FP_OVERFLOW_##fs) |
83 | #define _FP_FRAC_CLEAR_OVERP_1(fs, X) (X##_f &= ~_FP_OVERFLOW_##fs) |
84 | #define _FP_FRAC_HIGHBIT_DW_1(fs, X) (X##_f & _FP_HIGHBIT_DW_##fs) |
85 | #define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f) |
86 | #define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f) |
87 | #define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f) |
88 | |
89 | #define _FP_ZEROFRAC_1 0 |
90 | #define _FP_MINFRAC_1 1 |
91 | #define _FP_MAXFRAC_1 (~(_FP_WS_TYPE) 0) |
92 | |
93 | /* Unpack the raw bits of a native fp value. Do not classify or |
94 | normalize the data. */ |
95 | |
96 | #define _FP_UNPACK_RAW_1(fs, X, val) \ |
97 | do \ |
98 | { \ |
99 | union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \ |
100 | _FP_UNPACK_RAW_1_flo.flt = (val); \ |
101 | \ |
102 | X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \ |
103 | X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \ |
104 | X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \ |
105 | } \ |
106 | while (0) |
107 | |
108 | #define _FP_UNPACK_RAW_1_P(fs, X, val) \ |
109 | do \ |
110 | { \ |
111 | union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \ |
112 | = (union _FP_UNION_##fs *) (val); \ |
113 | \ |
114 | X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \ |
115 | X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \ |
116 | X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \ |
117 | } \ |
118 | while (0) |
119 | |
120 | /* Repack the raw bits of a native fp value. */ |
121 | |
122 | #define _FP_PACK_RAW_1(fs, val, X) \ |
123 | do \ |
124 | { \ |
125 | union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \ |
126 | \ |
127 | _FP_PACK_RAW_1_flo.bits.frac = X##_f; \ |
128 | _FP_PACK_RAW_1_flo.bits.exp = X##_e; \ |
129 | _FP_PACK_RAW_1_flo.bits.sign = X##_s; \ |
130 | \ |
131 | (val) = _FP_PACK_RAW_1_flo.flt; \ |
132 | } \ |
133 | while (0) |
134 | |
135 | #define _FP_PACK_RAW_1_P(fs, val, X) \ |
136 | do \ |
137 | { \ |
138 | union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \ |
139 | = (union _FP_UNION_##fs *) (val); \ |
140 | \ |
141 | _FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \ |
142 | _FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \ |
143 | _FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \ |
144 | } \ |
145 | while (0) |
146 | |
147 | |
148 | /* Multiplication algorithms: */ |
149 | |
150 | /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the |
151 | multiplication immediately. */ |
152 | |
153 | #define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y) \ |
154 | do \ |
155 | { \ |
156 | R##_f = X##_f * Y##_f; \ |
157 | } \ |
158 | while (0) |
159 | |
160 | #define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \ |
161 | do \ |
162 | { \ |
163 | _FP_MUL_MEAT_DW_1_imm ((wfracbits), R, X, Y); \ |
164 | /* Normalize since we know where the msb of the multiplicands \ |
165 | were (bit B), we know that the msb of the of the product is \ |
166 | at either 2B or 2B-1. */ \ |
167 | _FP_FRAC_SRS_1 (R, (wfracbits)-1, 2*(wfracbits)); \ |
168 | } \ |
169 | while (0) |
170 | |
171 | /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ |
172 | |
173 | #define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit) \ |
174 | do \ |
175 | { \ |
176 | doit (R##_f1, R##_f0, X##_f, Y##_f); \ |
177 | } \ |
178 | while (0) |
179 | |
180 | #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \ |
181 | do \ |
182 | { \ |
183 | _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \ |
184 | _FP_MUL_MEAT_DW_1_wide ((wfracbits), _FP_MUL_MEAT_1_wide_Z, \ |
185 | X, Y, doit); \ |
186 | /* Normalize since we know where the msb of the multiplicands \ |
187 | were (bit B), we know that the msb of the of the product is \ |
188 | at either 2B or 2B-1. */ \ |
189 | _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, (wfracbits)-1, \ |
190 | 2*(wfracbits)); \ |
191 | R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \ |
192 | } \ |
193 | while (0) |
194 | |
195 | /* Finally, a simple widening multiply algorithm. What fun! */ |
196 | |
197 | #define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \ |
198 | do \ |
199 | { \ |
200 | _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \ |
201 | _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \ |
202 | _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \ |
203 | \ |
204 | /* Split the words in half. */ \ |
205 | _FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ |
206 | _FP_MUL_MEAT_DW_1_hard_xl \ |
207 | = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ |
208 | _FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ |
209 | _FP_MUL_MEAT_DW_1_hard_yl \ |
210 | = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ |
211 | \ |
212 | /* Multiply the pieces. */ \ |
213 | R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \ |
214 | _FP_MUL_MEAT_DW_1_hard_a_f0 \ |
215 | = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \ |
216 | _FP_MUL_MEAT_DW_1_hard_a_f1 \ |
217 | = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \ |
218 | R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \ |
219 | \ |
220 | /* Reassemble into two full words. */ \ |
221 | if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \ |
222 | < _FP_MUL_MEAT_DW_1_hard_a_f1) \ |
223 | R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \ |
224 | _FP_MUL_MEAT_DW_1_hard_a_f1 \ |
225 | = _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \ |
226 | _FP_MUL_MEAT_DW_1_hard_a_f0 \ |
227 | = _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \ |
228 | _FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \ |
229 | } \ |
230 | while (0) |
231 | |
232 | #define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \ |
233 | do \ |
234 | { \ |
235 | _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \ |
236 | _FP_MUL_MEAT_DW_1_hard ((wfracbits), \ |
237 | _FP_MUL_MEAT_1_hard_z, X, Y); \ |
238 | \ |
239 | /* Normalize. */ \ |
240 | _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \ |
241 | (wfracbits) - 1, 2*(wfracbits)); \ |
242 | R##_f = _FP_MUL_MEAT_1_hard_z_f0; \ |
243 | } \ |
244 | while (0) |
245 | |
246 | |
247 | /* Division algorithms: */ |
248 | |
249 | /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the |
250 | division immediately. Give this macro either _FP_DIV_HELP_imm for |
251 | C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you |
252 | choose will depend on what the compiler does with divrem4. */ |
253 | |
254 | #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ |
255 | do \ |
256 | { \ |
257 | _FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \ |
258 | X##_f <<= (X##_f < Y##_f \ |
259 | ? R##_e--, _FP_WFRACBITS_##fs \ |
260 | : _FP_WFRACBITS_##fs - 1); \ |
261 | doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \ |
262 | R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \ |
263 | } \ |
264 | while (0) |
265 | |
266 | /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd |
267 | that may be useful in this situation. This first is for a primitive |
268 | that requires normalization, the second for one that does not. Look |
269 | for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */ |
270 | |
271 | #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ |
272 | do \ |
273 | { \ |
274 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \ |
275 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \ |
276 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \ |
277 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \ |
278 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \ |
279 | \ |
280 | /* Normalize Y -- i.e. make the most significant bit set. */ \ |
281 | _FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \ |
282 | \ |
283 | /* Shift X op correspondingly high, that is, up one full word. */ \ |
284 | if (X##_f < Y##_f) \ |
285 | { \ |
286 | R##_e--; \ |
287 | _FP_DIV_MEAT_1_udiv_norm_nl = 0; \ |
288 | _FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \ |
289 | } \ |
290 | else \ |
291 | { \ |
292 | _FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \ |
293 | _FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \ |
294 | } \ |
295 | \ |
296 | udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \ |
297 | _FP_DIV_MEAT_1_udiv_norm_r, \ |
298 | _FP_DIV_MEAT_1_udiv_norm_nh, \ |
299 | _FP_DIV_MEAT_1_udiv_norm_nl, \ |
300 | _FP_DIV_MEAT_1_udiv_norm_y); \ |
301 | R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \ |
302 | | (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \ |
303 | } \ |
304 | while (0) |
305 | |
306 | #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ |
307 | do \ |
308 | { \ |
309 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \ |
310 | _FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \ |
311 | if (X##_f < Y##_f) \ |
312 | { \ |
313 | R##_e--; \ |
314 | _FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \ |
315 | _FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \ |
316 | } \ |
317 | else \ |
318 | { \ |
319 | _FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ |
320 | _FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ |
321 | } \ |
322 | udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \ |
323 | _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \ |
324 | Y##_f); \ |
325 | R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \ |
326 | } \ |
327 | while (0) |
328 | |
329 | |
330 | /* Square root algorithms: |
331 | We have just one right now, maybe Newton approximation |
332 | should be added for those machines where division is fast. */ |
333 | |
334 | #define _FP_SQRT_MEAT_1(R, S, T, X, q) \ |
335 | do \ |
336 | { \ |
337 | while ((q) != _FP_WORK_ROUND) \ |
338 | { \ |
339 | T##_f = S##_f + (q); \ |
340 | if (T##_f <= X##_f) \ |
341 | { \ |
342 | S##_f = T##_f + (q); \ |
343 | X##_f -= T##_f; \ |
344 | R##_f += (q); \ |
345 | } \ |
346 | _FP_FRAC_SLL_1 (X, 1); \ |
347 | (q) >>= 1; \ |
348 | } \ |
349 | if (X##_f) \ |
350 | { \ |
351 | if (S##_f < X##_f) \ |
352 | R##_f |= _FP_WORK_ROUND; \ |
353 | R##_f |= _FP_WORK_STICKY; \ |
354 | } \ |
355 | } \ |
356 | while (0) |
357 | |
358 | /* Assembly/disassembly for converting to/from integral types. |
359 | No shifting or overflow handled here. */ |
360 | |
361 | #define _FP_FRAC_ASSEMBLE_1(r, X, rsize) ((r) = X##_f) |
362 | #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = (r)) |
363 | |
364 | |
365 | /* Convert FP values between word sizes. */ |
366 | |
367 | #define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f) |
368 | |
369 | #endif /* !SOFT_FP_OP_1_H */ |
370 | |