| 1 | /* Configure soft-fp for building sqrtf128. Based on sfp-machine.h in |
| 2 | libgcc, with soft-float and other irrelevant parts removed. */ |
| 3 | |
| 4 | /* The type of the result of a floating point comparison. This must |
| 5 | match `__libgcc_cmp_return__' in GCC for the target. */ |
| 6 | typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); |
| 7 | #define CMPtype __gcc_CMPtype |
| 8 | |
| 9 | #ifdef __x86_64__ |
| 10 | # define _FP_W_TYPE_SIZE 64 |
| 11 | # define _FP_W_TYPE unsigned long long |
| 12 | # define _FP_WS_TYPE signed long long |
| 13 | # define _FP_I_TYPE long long |
| 14 | |
| 15 | typedef int TItype __attribute__ ((mode (TI))); |
| 16 | typedef unsigned int UTItype __attribute__ ((mode (TI))); |
| 17 | |
| 18 | # define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype)) |
| 19 | |
| 20 | # define _FP_MUL_MEAT_Q(R,X,Y) \ |
| 21 | _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) |
| 22 | |
| 23 | # define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) |
| 24 | |
| 25 | # define _FP_NANFRAC_S _FP_QNANBIT_S |
| 26 | # define _FP_NANFRAC_D _FP_QNANBIT_D |
| 27 | # define _FP_NANFRAC_E _FP_QNANBIT_E, 0 |
| 28 | # define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 |
| 29 | |
| 30 | # define FP_EX_SHIFT 7 |
| 31 | |
| 32 | # define _FP_DECL_EX \ |
| 33 | unsigned int _fcw __attribute__ ((unused)) = FP_RND_NEAREST; |
| 34 | |
| 35 | # define FP_RND_NEAREST 0 |
| 36 | # define FP_RND_ZERO 0x6000 |
| 37 | # define FP_RND_PINF 0x4000 |
| 38 | # define FP_RND_MINF 0x2000 |
| 39 | |
| 40 | # define FP_RND_MASK 0x6000 |
| 41 | |
| 42 | # ifdef __AVX__ |
| 43 | # define AVX_INSN_PREFIX "v" |
| 44 | # else |
| 45 | # define AVX_INSN_PREFIX "" |
| 46 | # endif |
| 47 | |
| 48 | # define FP_INIT_ROUNDMODE \ |
| 49 | do { \ |
| 50 | __asm__ __volatile__ (AVX_INSN_PREFIX "stmxcsr\t%0" : "=m" (_fcw)); \ |
| 51 | } while (0) |
| 52 | #else |
| 53 | # define _FP_W_TYPE_SIZE 32 |
| 54 | # define _FP_W_TYPE unsigned int |
| 55 | # define _FP_WS_TYPE signed int |
| 56 | # define _FP_I_TYPE int |
| 57 | |
| 58 | # define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
| 59 | __asm__ ("add{l} {%11,%3|%3,%11}\n\t" \ |
| 60 | "adc{l} {%9,%2|%2,%9}\n\t" \ |
| 61 | "adc{l} {%7,%1|%1,%7}\n\t" \ |
| 62 | "adc{l} {%5,%0|%0,%5}" \ |
| 63 | : "=r" ((USItype) (r3)), \ |
| 64 | "=&r" ((USItype) (r2)), \ |
| 65 | "=&r" ((USItype) (r1)), \ |
| 66 | "=&r" ((USItype) (r0)) \ |
| 67 | : "%0" ((USItype) (x3)), \ |
| 68 | "g" ((USItype) (y3)), \ |
| 69 | "%1" ((USItype) (x2)), \ |
| 70 | "g" ((USItype) (y2)), \ |
| 71 | "%2" ((USItype) (x1)), \ |
| 72 | "g" ((USItype) (y1)), \ |
| 73 | "%3" ((USItype) (x0)), \ |
| 74 | "g" ((USItype) (y0))) |
| 75 | # define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ |
| 76 | __asm__ ("add{l} {%8,%2|%2,%8}\n\t" \ |
| 77 | "adc{l} {%6,%1|%1,%6}\n\t" \ |
| 78 | "adc{l} {%4,%0|%0,%4}" \ |
| 79 | : "=r" ((USItype) (r2)), \ |
| 80 | "=&r" ((USItype) (r1)), \ |
| 81 | "=&r" ((USItype) (r0)) \ |
| 82 | : "%0" ((USItype) (x2)), \ |
| 83 | "g" ((USItype) (y2)), \ |
| 84 | "%1" ((USItype) (x1)), \ |
| 85 | "g" ((USItype) (y1)), \ |
| 86 | "%2" ((USItype) (x0)), \ |
| 87 | "g" ((USItype) (y0))) |
| 88 | # define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
| 89 | __asm__ ("sub{l} {%11,%3|%3,%11}\n\t" \ |
| 90 | "sbb{l} {%9,%2|%2,%9}\n\t" \ |
| 91 | "sbb{l} {%7,%1|%1,%7}\n\t" \ |
| 92 | "sbb{l} {%5,%0|%0,%5}" \ |
| 93 | : "=r" ((USItype) (r3)), \ |
| 94 | "=&r" ((USItype) (r2)), \ |
| 95 | "=&r" ((USItype) (r1)), \ |
| 96 | "=&r" ((USItype) (r0)) \ |
| 97 | : "0" ((USItype) (x3)), \ |
| 98 | "g" ((USItype) (y3)), \ |
| 99 | "1" ((USItype) (x2)), \ |
| 100 | "g" ((USItype) (y2)), \ |
| 101 | "2" ((USItype) (x1)), \ |
| 102 | "g" ((USItype) (y1)), \ |
| 103 | "3" ((USItype) (x0)), \ |
| 104 | "g" ((USItype) (y0))) |
| 105 | # define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ |
| 106 | __asm__ ("sub{l} {%8,%2|%2,%8}\n\t" \ |
| 107 | "sbb{l} {%6,%1|%1,%6}\n\t" \ |
| 108 | "sbb{l} {%4,%0|%0,%4}" \ |
| 109 | : "=r" ((USItype) (r2)), \ |
| 110 | "=&r" ((USItype) (r1)), \ |
| 111 | "=&r" ((USItype) (r0)) \ |
| 112 | : "0" ((USItype) (x2)), \ |
| 113 | "g" ((USItype) (y2)), \ |
| 114 | "1" ((USItype) (x1)), \ |
| 115 | "g" ((USItype) (y1)), \ |
| 116 | "2" ((USItype) (x0)), \ |
| 117 | "g" ((USItype) (y0))) |
| 118 | # define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ |
| 119 | __asm__ ("add{l} {%4,%3|%3,%4}\n\t" \ |
| 120 | "adc{l} {$0,%2|%2,0}\n\t" \ |
| 121 | "adc{l} {$0,%1|%1,0}\n\t" \ |
| 122 | "adc{l} {$0,%0|%0,0}" \ |
| 123 | : "+r" ((USItype) (x3)), \ |
| 124 | "+&r" ((USItype) (x2)), \ |
| 125 | "+&r" ((USItype) (x1)), \ |
| 126 | "+&r" ((USItype) (x0)) \ |
| 127 | : "g" ((USItype) (i))) |
| 128 | |
| 129 | |
| 130 | # define _FP_MUL_MEAT_S(R,X,Y) \ |
| 131 | _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) |
| 132 | # define _FP_MUL_MEAT_D(R,X,Y) \ |
| 133 | _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) |
| 134 | # define _FP_MUL_MEAT_Q(R,X,Y) \ |
| 135 | _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) |
| 136 | |
| 137 | # define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) |
| 138 | # define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) |
| 139 | # define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) |
| 140 | |
| 141 | # define _FP_NANFRAC_S _FP_QNANBIT_S |
| 142 | # define _FP_NANFRAC_D _FP_QNANBIT_D, 0 |
| 143 | /* Even if XFmode is 12byte, we have to pad it to |
| 144 | 16byte since soft-fp emulation is done in 16byte. */ |
| 145 | # define _FP_NANFRAC_E _FP_QNANBIT_E, 0, 0, 0 |
| 146 | # define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 |
| 147 | |
| 148 | # define FP_EX_SHIFT 0 |
| 149 | |
| 150 | # define _FP_DECL_EX \ |
| 151 | unsigned short _fcw __attribute__ ((unused)) = FP_RND_NEAREST; |
| 152 | |
| 153 | # define FP_RND_NEAREST 0 |
| 154 | # define FP_RND_ZERO 0xc00 |
| 155 | # define FP_RND_PINF 0x800 |
| 156 | # define FP_RND_MINF 0x400 |
| 157 | |
| 158 | # define FP_RND_MASK 0xc00 |
| 159 | |
| 160 | # define FP_INIT_ROUNDMODE \ |
| 161 | do { \ |
| 162 | __asm__ __volatile__ ("fnstcw\t%0" : "=m" (_fcw)); \ |
| 163 | } while (0) |
| 164 | #endif |
| 165 | |
| 166 | #define _FP_KEEPNANFRACP 1 |
| 167 | #define _FP_QNANNEGATEDP 0 |
| 168 | |
| 169 | #define _FP_NANSIGN_S 1 |
| 170 | #define _FP_NANSIGN_D 1 |
| 171 | #define _FP_NANSIGN_E 1 |
| 172 | #define _FP_NANSIGN_Q 1 |
| 173 | |
| 174 | /* Here is something Intel misdesigned: the specs don't define |
| 175 | the case where we have two NaNs with same mantissas, but |
| 176 | different sign. Different operations pick up different NaNs. */ |
| 177 | #define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ |
| 178 | do { \ |
| 179 | if (_FP_FRAC_GT_##wc(X, Y) \ |
| 180 | || (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*'))) \ |
| 181 | { \ |
| 182 | R##_s = X##_s; \ |
| 183 | _FP_FRAC_COPY_##wc(R,X); \ |
| 184 | } \ |
| 185 | else \ |
| 186 | { \ |
| 187 | R##_s = Y##_s; \ |
| 188 | _FP_FRAC_COPY_##wc(R,Y); \ |
| 189 | } \ |
| 190 | R##_c = FP_CLS_NAN; \ |
| 191 | } while (0) |
| 192 | |
| 193 | #define FP_EX_INVALID 0x01 |
| 194 | #define FP_EX_DENORM 0x02 |
| 195 | #define FP_EX_DIVZERO 0x04 |
| 196 | #define FP_EX_OVERFLOW 0x08 |
| 197 | #define FP_EX_UNDERFLOW 0x10 |
| 198 | #define FP_EX_INEXACT 0x20 |
| 199 | #define FP_EX_ALL \ |
| 200 | (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \ |
| 201 | | FP_EX_UNDERFLOW | FP_EX_INEXACT) |
| 202 | |
| 203 | void __sfp_handle_exceptions (int); |
| 204 | |
| 205 | #define FP_HANDLE_EXCEPTIONS \ |
| 206 | do { \ |
| 207 | if (__builtin_expect (_fex, 0)) \ |
| 208 | __sfp_handle_exceptions (_fex); \ |
| 209 | } while (0); |
| 210 | |
| 211 | #define FP_TRAPPING_EXCEPTIONS ((~_fcw >> FP_EX_SHIFT) & FP_EX_ALL) |
| 212 | |
| 213 | #define FP_ROUNDMODE (_fcw & FP_RND_MASK) |
| 214 | |
| 215 | #define _FP_TININESS_AFTER_ROUNDING 1 |
| 216 | |