op-2.h source code [glibc/soft-fp/op-2.h]

1	/ Software floating-point emulation.*
2	Basic two-word fraction declaration and manipulation.
3	Copyright (C) 1997-2023 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	In addition to the permissions in the GNU Lesser General Public
12	License, the Free Software Foundation gives you unlimited
13	permission to link the compiled version of this file into
14	combinations with other programs, and to distribute those
15	combinations without any restriction coming from the use of this
16	file. (The Lesser General Public License restrictions do apply in
17	other respects; for example, they cover modification of the file,
18	and distribution when not linked into a combine executable.)
19
20	The GNU C Library is distributed in the hope that it will be useful,
21	but WITHOUT ANY WARRANTY; without even the implied warranty of
22	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23	Lesser General Public License for more details.
24
25	You should have received a copy of the GNU Lesser General Public
26	License along with the GNU C Library; if not, see
27	<https://www.gnu.org/licenses/>. /*
28
29	#ifndef SOFT_FP_OP_2_H
30	#define SOFT_FP_OP_2_H 1
31
32	#define _FP_FRAC_DECL_2(X) \
33	_FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
34	#define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35	#define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
36	#define _FP_FRAC_HIGH_2(X) (X##_f1)
37	#define _FP_FRAC_LOW_2(X) (X##_f0)
38	#define _FP_FRAC_WORD_2(X, w) (X##_f##w)
39
40	#define _FP_FRAC_SLL_2(X, N) \
41	(void) (((N) < _FP_W_TYPE_SIZE) \
42	? ({ \
43	if (__builtin_constant_p (N) && (N) == 1) \
44	{ \
45	X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46	X##_f0 += X##_f0; \
47	} \
48	else \
49	{ \
50	X##_f1 = X##_f1 << (N) \| X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51	X##_f0 <<= (N); \
52	} \
53	0; \
54	}) \
55	: ({ \
56	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57	X##_f0 = 0; \
58	}))
59
60
61	#define _FP_FRAC_SRL_2(X, N) \
62	(void) (((N) < _FP_W_TYPE_SIZE) \
63	? ({ \
64	X##_f0 = X##_f0 >> (N) \| X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65	X##_f1 >>= (N); \
66	}) \
67	: ({ \
68	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69	X##_f1 = 0; \
70	}))
71
72	/ Right shift with sticky-lsb. /
73	#define _FP_FRAC_SRST_2(X, S, N, sz) \
74	(void) (((N) < _FP_W_TYPE_SIZE) \
75	? ({ \
76	S = (__builtin_constant_p (N) && (N) == 1 \
77	? X##_f0 & 1 \
78	: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) \| X##_f0 >> (N)); \
80	X##_f1 >>= (N); \
81	}) \
82	: ({ \
83	S = ((((N) == _FP_W_TYPE_SIZE \
84	? 0 \
85	: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86	\| X##_f0) != 0); \
87	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88	X##_f1 = 0; \
89	}))
90
91	#define _FP_FRAC_SRS_2(X, N, sz) \
92	(void) (((N) < _FP_W_TYPE_SIZE) \
93	? ({ \
94	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) \| X##_f0 >> (N) \
95	\| (__builtin_constant_p (N) && (N) == 1 \
96	? X##_f0 & 1 \
97	: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98	X##_f1 >>= (N); \
99	}) \
100	: ({ \
101	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
102	\| ((((N) == _FP_W_TYPE_SIZE \
103	? 0 \
104	: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105	\| X##_f0) != 0)); \
106	X##_f1 = 0; \
107	}))
108
109	#define _FP_FRAC_ADDI_2(X, I) \
110	__FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
111
112	#define _FP_FRAC_ADD_2(R, X, Y) \
113	__FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115	#define _FP_FRAC_SUB_2(R, X, Y) \
116	__FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118	#define _FP_FRAC_DEC_2(X, Y) \
119	__FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121	#define _FP_FRAC_CLZ_2(R, X) \
122	do \
123	{ \
124	if (X##_f1) \
125	__FP_CLZ ((R), X##_f1); \
126	else \
127	{ \
128	__FP_CLZ ((R), X##_f0); \
129	(R) += _FP_W_TYPE_SIZE; \
130	} \
131	} \
132	while (0)
133
134	/ Predicates. /
135	#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
136	#define _FP_FRAC_ZEROP_2(X) ((X##_f1 \| X##_f0) == 0)
137	#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138	#define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139	#define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
140	(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141	#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142	#define _FP_FRAC_GT_2(X, Y) \
143	(X##_f1 > Y##_f1 \|\| (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144	#define _FP_FRAC_GE_2(X, Y) \
145	(X##_f1 > Y##_f1 \|\| (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
146
147	#define _FP_ZEROFRAC_2 0, 0
148	#define _FP_MINFRAC_2 0, 1
149	#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150
151	/ Internals. /
152
153	#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
154
155	#define __FP_CLZ_2(R, xh, xl) \
156	do \
157	{ \
158	if (xh) \
159	__FP_CLZ ((R), xh); \
160	else \
161	{ \
162	__FP_CLZ ((R), xl); \
163	(R) += _FP_W_TYPE_SIZE; \
164	} \
165	} \
166	while (0)
167
168	#if 0
169
170	# ifndef __FP_FRAC_ADDI_2
171	# define __FP_FRAC_ADDI_2(xh, xl, i) \
172	(xh += ((xl += i) < i))
173	# endif
174	# ifndef __FP_FRAC_ADD_2
175	# define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
176	(rh = xh + yh + ((rl = xl + yl) < xl))
177	# endif
178	# ifndef __FP_FRAC_SUB_2
179	# define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
180	(rh = xh - yh - ((rl = xl - yl) > xl))
181	# endif
182	# ifndef __FP_FRAC_DEC_2
183	# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
184	do \
185	{ \
186	UWtype __FP_FRAC_DEC_2_t = xl; \
187	xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
188	} \
189	while (0)
190	# endif
191
192	#else
193
194	# undef __FP_FRAC_ADDI_2
195	# define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
196	# undef __FP_FRAC_ADD_2
197	# define __FP_FRAC_ADD_2 add_ssaaaa
198	# undef __FP_FRAC_SUB_2
199	# define __FP_FRAC_SUB_2 sub_ddmmss
200	# undef __FP_FRAC_DEC_2
201	# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
202	sub_ddmmss (xh, xl, xh, xl, yh, yl)
203
204	#endif
205
206	/ Unpack the raw bits of a native fp value. Do not classify or*
207	normalize the data. /*
208
209	#define _FP_UNPACK_RAW_2(fs, X, val) \
210	do \
211	{ \
212	union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
213	_FP_UNPACK_RAW_2_flo.flt = (val); \
214	\
215	X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
216	X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
217	X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
218	X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
219	} \
220	while (0)
221
222	#define _FP_UNPACK_RAW_2_P(fs, X, val) \
223	do \
224	{ \
225	union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
226	= (union _FP_UNION_##fs *) (val); \
227	\
228	X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
229	X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
230	X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
231	X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
232	} \
233	while (0)
234
235
236	/ Repack the raw bits of a native fp value. /
237
238	#define _FP_PACK_RAW_2(fs, val, X) \
239	do \
240	{ \
241	union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
242	\
243	_FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
244	_FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
245	_FP_PACK_RAW_2_flo.bits.exp = X##_e; \
246	_FP_PACK_RAW_2_flo.bits.sign = X##_s; \
247	\
248	(val) = _FP_PACK_RAW_2_flo.flt; \
249	} \
250	while (0)
251
252	#define _FP_PACK_RAW_2_P(fs, val, X) \
253	do \
254	{ \
255	union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
256	= (union _FP_UNION_##fs *) (val); \
257	\
258	_FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
259	_FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
260	_FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
261	_FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
262	} \
263	while (0)
264
265
266	/ Multiplication algorithms: /
267
268	/ Given a 1W * 1W => 2W primitive, do the extended multiplication. /
269
270	#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
271	do \
272	{ \
273	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
274	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
275	\
276	doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
277	X##_f0, Y##_f0); \
278	doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
279	X##_f0, Y##_f1); \
280	doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
281	X##_f1, Y##_f0); \
282	doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
283	X##_f1, Y##_f1); \
284	\
285	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
286	_FP_FRAC_WORD_4 (R, 1), 0, \
287	_FP_MUL_MEAT_DW_2_wide_b_f1, \
288	_FP_MUL_MEAT_DW_2_wide_b_f0, \
289	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290	_FP_FRAC_WORD_4 (R, 1)); \
291	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
292	_FP_FRAC_WORD_4 (R, 1), 0, \
293	_FP_MUL_MEAT_DW_2_wide_c_f1, \
294	_FP_MUL_MEAT_DW_2_wide_c_f0, \
295	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296	_FP_FRAC_WORD_4 (R, 1)); \
297	} \
298	while (0)
299
300	#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
301	do \
302	{ \
303	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
304	\
305	_FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
306	X, Y, doit); \
307	\
308	/* Normalize since we know where the msb of the multiplicands \
309	were (bit B), we know that the msb of the of the product is \
310	at either 2B or 2B-1. */ \
311	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
312	2*(wfracbits)); \
313	R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
314	R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
315	} \
316	while (0)
317
318	/ Given a 1W * 1W => 2W primitive, do the extended multiplication.*
319	Do only 3 multiplications instead of four. This one is for machines
320	where multiplication is much more expensive than subtraction. /*
321
322	#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
323	do \
324	{ \
325	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
326	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
327	_FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
328	int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
329	int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
330	\
331	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
332	_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
333	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
334	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
335	_FP_MUL_MEAT_DW_2_wide_3mul_c2 \
336	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
337	doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
338	X##_f0, Y##_f0); \
339	doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
340	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
341	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
342	doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
343	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
344	\
345	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
346	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
347	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
348	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
349	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
350	_FP_FRAC_WORD_4 (R, 1), \
351	(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
352	& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
353	_FP_MUL_MEAT_DW_2_wide_3mul_d, \
354	0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
355	__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
356	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
357	__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
358	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
359	__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
360	_FP_FRAC_WORD_4 (R, 1), \
361	0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
362	_FP_FRAC_WORD_4 (R, 0)); \
363	__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364	_FP_FRAC_WORD_4 (R, 1), 0, \
365	_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
366	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
367	__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
368	_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
369	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
370	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
371	} \
372	while (0)
373
374	#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
375	do \
376	{ \
377	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
378	\
379	_FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
380	_FP_MUL_MEAT_2_wide_3mul_z, \
381	X, Y, doit); \
382	\
383	/* Normalize since we know where the msb of the multiplicands \
384	were (bit B), we know that the msb of the of the product is \
385	at either 2B or 2B-1. */ \
386	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
387	(wfracbits)-1, 2*(wfracbits)); \
388	R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
389	R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
390	} \
391	while (0)
392
393	#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
394	do \
395	{ \
396	_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
397	_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
398	_FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
399	_FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
400	_FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
401	_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
402	\
403	mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
404	_FP_MUL_MEAT_DW_2_gmp_y, 2); \
405	} \
406	while (0)
407
408	#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
409	do \
410	{ \
411	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
412	\
413	_FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
414	\
415	/* Normalize since we know where the msb of the multiplicands \
416	were (bit B), we know that the msb of the of the product is \
417	at either 2B or 2B-1. */ \
418	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
419	2*(wfracbits)); \
420	R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
421	R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
422	} \
423	while (0)
424
425	/ Do at most 120x120=240 bits multiplication using double floating*
426	point multiplication. This is useful if floating point
427	multiplication has much bigger throughput than integer multiply.
428	It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
429	between 106 and 120 only.
430	Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
431	SETFETZ is a macro which will disable all FPU exceptions and set rounding
432	towards zero, RESETFE should optionally reset it back. /*
433
434	#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
435	do \
436	{ \
437	static const double _const[] = \
438	{ \
439	/* 2^-24 */ 5.9604644775390625e-08, \
440	/* 2^-48 */ 3.5527136788005009e-15, \
441	/* 2^-72 */ 2.1175823681357508e-22, \
442	/* 2^-96 */ 1.2621774483536189e-29, \
443	/* 2^28 */ 2.68435456e+08, \
444	/* 2^4 */ 1.600000e+01, \
445	/* 2^-20 */ 9.5367431640625e-07, \
446	/* 2^-44 */ 5.6843418860808015e-14, \
447	/* 2^-68 */ 3.3881317890172014e-21, \
448	/* 2^-92 */ 2.0194839173657902e-28, \
449	/* 2^-116 */ 1.2037062152420224e-35 \
450	}; \
451	double _a240, _b240, _c240, _d240, _e240, _f240, \
452	_g240, _h240, _i240, _j240, _k240; \
453	union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
454	_p240, _q240, _r240, _s240; \
455	UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
456	\
457	_FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
458	"wfracbits out of range"); \
459	\
460	setfetz; \
461	\
462	_e240 = (double) (long) (X##_f0 & 0xffffff); \
463	_j240 = (double) (long) (Y##_f0 & 0xffffff); \
464	_d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
465	_i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
466	_c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) \| (X##_f0 >> 48)); \
467	_h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) \| (Y##_f0 >> 48)); \
468	_b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
469	_g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
470	_a240 = (double) (long) (X##_f1 >> 32); \
471	_f240 = (double) (long) (Y##_f1 >> 32); \
472	_e240 *= _const[3]; \
473	_j240 *= _const[3]; \
474	_d240 *= _const[2]; \
475	_i240 *= _const[2]; \
476	_c240 *= _const[1]; \
477	_h240 *= _const[1]; \
478	_b240 *= _const[0]; \
479	_g240 *= _const[0]; \
480	_s240.d = _e240*_j240; \
481	_r240.d = _d240_j240 + _e240_i240; \
482	_q240.d = _c240_j240 + _d240_i240 + _e240*_h240; \
483	_p240.d = _b240_j240 + _c240_i240 + _d240_h240 + _e240_g240; \
484	_o240.d = _a240_j240 + _b240_i240 + _c240_h240 + _d240_g240 + _e240*_f240; \
485	_n240.d = _a240_i240 + _b240_h240 + _c240_g240 + _d240_f240; \
486	_m240.d = _a240_h240 + _b240_g240 + _c240*_f240; \
487	_l240.d = _a240_g240 + _b240_f240; \
488	_k240 = _a240*_f240; \
489	_r240.d += _s240.d; \
490	_q240.d += _r240.d; \
491	_p240.d += _q240.d; \
492	_o240.d += _p240.d; \
493	_n240.d += _o240.d; \
494	_m240.d += _n240.d; \
495	_l240.d += _m240.d; \
496	_k240 += _l240.d; \
497	_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
498	_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
499	_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
500	_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
501	_o240.d += _const[7]; \
502	_n240.d += _const[6]; \
503	_m240.d += _const[5]; \
504	_l240.d += _const[4]; \
505	if (_s240.d != 0.0) \
506	_y240 = 1; \
507	if (_r240.d != 0.0) \
508	_y240 = 1; \
509	if (_q240.d != 0.0) \
510	_y240 = 1; \
511	if (_p240.d != 0.0) \
512	_y240 = 1; \
513	_t240 = (DItype) _k240; \
514	_u240 = _l240.i; \
515	_v240 = _m240.i; \
516	_w240 = _n240.i; \
517	_x240 = _o240.i; \
518	R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
519	\| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
520	R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
521	\| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
522	\| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
523	\| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
524	\| _y240); \
525	resetfe; \
526	} \
527	while (0)
528
529	/ Division algorithms: /
530
531	#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
532	do \
533	{ \
534	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
535	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
536	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
537	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
538	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
539	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
540	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
541	if (_FP_FRAC_GE_2 (X, Y)) \
542	{ \
543	_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
544	_FP_DIV_MEAT_2_udiv_n_f1 \
545	= X##_f1 << (_FP_W_TYPE_SIZE - 1) \| X##_f0 >> 1; \
546	_FP_DIV_MEAT_2_udiv_n_f0 \
547	= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
548	} \
549	else \
550	{ \
551	R##_e--; \
552	_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
553	_FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
554	_FP_DIV_MEAT_2_udiv_n_f0 = 0; \
555	} \
556	\
557	/* Normalize, i.e. make the most significant bit of the \
558	denominator set. */ \
559	_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
560	\
561	udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
562	_FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
563	Y##_f1); \
564	umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
565	R##_f1, Y##_f0); \
566	_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
567	if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
568	{ \
569	R##_f1--; \
570	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
571	_FP_DIV_MEAT_2_udiv_r); \
572	if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
573	&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
574	_FP_DIV_MEAT_2_udiv_r)) \
575	{ \
576	R##_f1--; \
577	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
578	_FP_DIV_MEAT_2_udiv_r); \
579	} \
580	} \
581	_FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
582	\
583	if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
584	{ \
585	/* This is a special case, not an optimization \
586	(_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
587	As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
588	R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
589	know what kind of bits it is (sticky, guard, round), \
590	we don't care. We also don't care what the reminder is, \
591	because the guard bit will be set anyway. -jj */ \
592	R##_f0 = -1; \
593	} \
594	else \
595	{ \
596	udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
597	_FP_DIV_MEAT_2_udiv_r_f1, \
598	_FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
599	umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
600	_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
601	_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
602	if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
603	_FP_DIV_MEAT_2_udiv_r)) \
604	{ \
605	R##_f0--; \
606	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
607	_FP_DIV_MEAT_2_udiv_r); \
608	if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
609	&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
610	_FP_DIV_MEAT_2_udiv_r)) \
611	{ \
612	R##_f0--; \
613	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
614	_FP_DIV_MEAT_2_udiv_r); \
615	} \
616	} \
617	if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
618	_FP_DIV_MEAT_2_udiv_m)) \
619	R##_f0 \|= _FP_WORK_STICKY; \
620	} \
621	} \
622	while (0)
623
624
625	/ Square root algorithms:*
626	We have just one right now, maybe Newton approximation
627	should be added for those machines where division is fast. /*
628
629	#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
630	do \
631	{ \
632	while (q) \
633	{ \
634	T##_f1 = S##_f1 + (q); \
635	if (T##_f1 <= X##_f1) \
636	{ \
637	S##_f1 = T##_f1 + (q); \
638	X##_f1 -= T##_f1; \
639	R##_f1 += (q); \
640	} \
641	_FP_FRAC_SLL_2 (X, 1); \
642	(q) >>= 1; \
643	} \
644	(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
645	while ((q) != _FP_WORK_ROUND) \
646	{ \
647	T##_f0 = S##_f0 + (q); \
648	T##_f1 = S##_f1; \
649	if (T##_f1 < X##_f1 \
650	\|\| (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
651	{ \
652	S##_f0 = T##_f0 + (q); \
653	S##_f1 += (T##_f0 > S##_f0); \
654	_FP_FRAC_DEC_2 (X, T); \
655	R##_f0 += (q); \
656	} \
657	_FP_FRAC_SLL_2 (X, 1); \
658	(q) >>= 1; \
659	} \
660	if (X##_f0 \| X##_f1) \
661	{ \
662	if (S##_f1 < X##_f1 \
663	\|\| (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
664	R##_f0 \|= _FP_WORK_ROUND; \
665	R##_f0 \|= _FP_WORK_STICKY; \
666	} \
667	} \
668	while (0)
669
670
671	/ Assembly/disassembly for converting to/from integral types.*
672	No shifting or overflow handled here. /*
673
674	#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
675	(void) (((rsize) <= _FP_W_TYPE_SIZE) \
676	? ({ (r) = X##_f0; }) \
677	: ({ \
678	(r) = X##_f1; \
679	(r) <<= _FP_W_TYPE_SIZE; \
680	(r) += X##_f0; \
681	}))
682
683	#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
684	do \
685	{ \
686	X##_f0 = (r); \
687	X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
688	? 0 \
689	: (r) >> _FP_W_TYPE_SIZE); \
690	} \
691	while (0)
692
693	/ Convert FP values between word sizes. /
694
695	#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
696
697	#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
698
699	#define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
700
701	#endif /* !SOFT_FP_OP_2_H */
702

Browse the source code of glibc/soft-fp/op-2.h