| 1 | /* Single-precision 10^x function. | 
|---|
| 2 | Copyright (C) 2020-2022 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 |  | 
|---|
| 5 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 6 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 7 | License as published by the Free Software Foundation; either | 
|---|
| 8 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 9 |  | 
|---|
| 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 13 | Lesser General Public License for more details. | 
|---|
| 14 |  | 
|---|
| 15 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 16 | License along with the GNU C Library; if not, see | 
|---|
| 17 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 18 |  | 
|---|
| 19 | #include <math.h> | 
|---|
| 20 | #include <math-narrow-eval.h> | 
|---|
| 21 | #include <stdint.h> | 
|---|
| 22 | #include <libm-alias-finite.h> | 
|---|
| 23 | #include <libm-alias-float.h> | 
|---|
| 24 | #include <shlib-compat.h> | 
|---|
| 25 | #include <math-svid-compat.h> | 
|---|
| 26 | #include "math_config.h" | 
|---|
| 27 |  | 
|---|
| 28 | /* | 
|---|
| 29 | EXP2F_TABLE_BITS 5 | 
|---|
| 30 | EXP2F_POLY_ORDER 3 | 
|---|
| 31 |  | 
|---|
| 32 | Max. ULP error: 0.502 (normal range, nearest rounding). | 
|---|
| 33 | Max. relative error: 2^-33.240 (before rounding to float). | 
|---|
| 34 | Wrong count: 169839. | 
|---|
| 35 | Non-nearest ULP error: 1 (rounded ULP error). | 
|---|
| 36 |  | 
|---|
| 37 | Detailed error analysis (for EXP2F_TABLE_BITS=3 thus N=32): | 
|---|
| 38 |  | 
|---|
| 39 | - We first compute z = RN(InvLn10N * x) where | 
|---|
| 40 |  | 
|---|
| 41 | InvLn10N = N*log(10)/log(2) * (1 + theta1) with |theta1| < 2^-54.150 | 
|---|
| 42 |  | 
|---|
| 43 | since z is rounded to nearest: | 
|---|
| 44 |  | 
|---|
| 45 | z = InvLn10N * x * (1 + theta2) with |theta2| < 2^-53 | 
|---|
| 46 |  | 
|---|
| 47 | thus z =  N*log(10)/log(2) * x * (1 + theta3) with |theta3| < 2^-52.463 | 
|---|
| 48 |  | 
|---|
| 49 | - Since |x| < 38 in the main branch, we deduce: | 
|---|
| 50 |  | 
|---|
| 51 | z = N*log(10)/log(2) * x + theta4 with |theta4| < 2^-40.483 | 
|---|
| 52 |  | 
|---|
| 53 | - We then write z = k + r where k is an integer and |r| <= 0.5 (exact) | 
|---|
| 54 |  | 
|---|
| 55 | - We thus have | 
|---|
| 56 |  | 
|---|
| 57 | x = z*log(2)/(N*log(10)) - theta4*log(2)/(N*log(10)) | 
|---|
| 58 | = z*log(2)/(N*log(10)) + theta5 with |theta5| < 2^-47.215 | 
|---|
| 59 |  | 
|---|
| 60 | 10^x = 2^(k/N) * 2^(r/N) * 10^theta5 | 
|---|
| 61 | =  2^(k/N) * 2^(r/N) * (1 + theta6) with |theta6| < 2^-46.011 | 
|---|
| 62 |  | 
|---|
| 63 | - Then 2^(k/N) is approximated by table lookup, the maximal relative error | 
|---|
| 64 | is for (k%N) = 5, with | 
|---|
| 65 |  | 
|---|
| 66 | s = 2^(i/N) * (1 + theta7) with |theta7| < 2^-53.249 | 
|---|
| 67 |  | 
|---|
| 68 | - 2^(r/N) is approximated by a degree-3 polynomial, where the maximal | 
|---|
| 69 | mathematical relative error is 2^-33.243. | 
|---|
| 70 |  | 
|---|
| 71 | - For C[0] * r + C[1], assuming no FMA is used, since |r| <= 0.5 and | 
|---|
| 72 | |C[0]| < 1.694e-6, |C[0] * r| < 8.47e-7, and the absolute error on | 
|---|
| 73 | C[0] * r is bounded by 1/2*ulp(8.47e-7) = 2^-74.  Then we add C[1] with | 
|---|
| 74 | |C[1]| < 0.000235, thus the absolute error on C[0] * r + C[1] is bounded | 
|---|
| 75 | by 2^-65.994 (z is bounded by 0.000236). | 
|---|
| 76 |  | 
|---|
| 77 | - For r2 = r * r, since |r| <= 0.5, we have |r2| <= 0.25 and the absolute | 
|---|
| 78 | error is bounded by 1/4*ulp(0.25) = 2^-56 (the factor 1/4 is because |r2| | 
|---|
| 79 | cannot exceed 1/4, and on the left of 1/4 the distance between two | 
|---|
| 80 | consecutive numbers is 1/4*ulp(1/4)). | 
|---|
| 81 |  | 
|---|
| 82 | - For y = C[2] * r + 1, assuming no FMA is used, since |r| <= 0.5 and | 
|---|
| 83 | |C[2]| < 0.0217, the absolute error on C[2] * r is bounded by 2^-60, | 
|---|
| 84 | and thus the absolute error on C[2] * r + 1 is bounded by 1/2*ulp(1)+2^60 | 
|---|
| 85 | < 2^-52.988, and |y| < 1.01085 (the error bound is better if a FMA is | 
|---|
| 86 | used). | 
|---|
| 87 |  | 
|---|
| 88 | - for z * r2 + y: the absolute error on z is bounded by 2^-65.994, with | 
|---|
| 89 | |z| < 0.000236, and the absolute error on r2 is bounded by 2^-56, with | 
|---|
| 90 | r2 < 0.25, thus |z*r2| < 0.000059, and the absolute error on z * r2 | 
|---|
| 91 | (including the rounding error) is bounded by: | 
|---|
| 92 |  | 
|---|
| 93 | 2^-65.994 * 0.25 + 0.000236 * 2^-56 + 1/2*ulp(0.000059) < 2^-66.429. | 
|---|
| 94 |  | 
|---|
| 95 | Now we add y, with |y| < 1.01085, and error on y bounded by 2^-52.988, | 
|---|
| 96 | thus the absolute error is bounded by: | 
|---|
| 97 |  | 
|---|
| 98 | 2^-66.429 + 2^-52.988 + 1/2*ulp(1.01085) < 2^-51.993 | 
|---|
| 99 |  | 
|---|
| 100 | - Now we convert the error on y into relative error.  Recall that y | 
|---|
| 101 | approximates 2^(r/N), for |r| <= 0.5 and N=32. Thus 2^(-0.5/32) <= y, | 
|---|
| 102 | and the relative error on y is bounded by | 
|---|
| 103 |  | 
|---|
| 104 | 2^-51.993/2^(-0.5/32) < 2^-51.977 | 
|---|
| 105 |  | 
|---|
| 106 | - Taking into account the mathematical relative error of 2^-33.243 we have: | 
|---|
| 107 |  | 
|---|
| 108 | y = 2^(r/N) * (1 + theta8) with |theta8| < 2^-33.242 | 
|---|
| 109 |  | 
|---|
| 110 | - Since we had s = 2^(k/N) * (1 + theta7) with |theta7| < 2^-53.249, | 
|---|
| 111 | after y = y * s we get y = 2^(k/N) * 2^(r/N) * (1 + theta9) | 
|---|
| 112 | with |theta9| < 2^-33.241 | 
|---|
| 113 |  | 
|---|
| 114 | - Finally, taking into account the error theta6 due to the rounding error on | 
|---|
| 115 | InvLn10N, and when multiplying InvLn10N * x, we get: | 
|---|
| 116 |  | 
|---|
| 117 | y = 10^x * (1 + theta10) with |theta10| < 2^-33.240 | 
|---|
| 118 |  | 
|---|
| 119 | - Converting into binary64 ulps, since y < 2^53*ulp(y), the error is at most | 
|---|
| 120 | 2^19.76 ulp(y) | 
|---|
| 121 |  | 
|---|
| 122 | - If the result is a binary32 value in the normal range (i.e., >= 2^-126), | 
|---|
| 123 | then the error is at most 2^-9.24 ulps, i.e., less than 0.00166 (in the | 
|---|
| 124 | subnormal range, the error in ulps might be larger). | 
|---|
| 125 |  | 
|---|
| 126 | Note that this bound is tight, since for x=-0x25.54ac0p0 the final value of | 
|---|
| 127 | y (before conversion to float) differs from 879853 ulps from the correctly | 
|---|
| 128 | rounded value, and 879853 ~ 2^19.7469.  */ | 
|---|
| 129 |  | 
|---|
| 130 | #define N (1 << EXP2F_TABLE_BITS) | 
|---|
| 131 |  | 
|---|
| 132 | #define InvLn10N (0x3.5269e12f346e2p0 * N) /* log(10)/log(2) to nearest */ | 
|---|
| 133 | #define T __exp2f_data.tab | 
|---|
| 134 | #define C __exp2f_data.poly_scaled | 
|---|
| 135 | #define SHIFT __exp2f_data.shift | 
|---|
| 136 |  | 
|---|
| 137 | static inline uint32_t | 
|---|
| 138 | top13 (float x) | 
|---|
| 139 | { | 
|---|
| 140 | return asuint (x) >> 19; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | float | 
|---|
| 144 | __exp10f (float x) | 
|---|
| 145 | { | 
|---|
| 146 | uint32_t abstop; | 
|---|
| 147 | uint64_t ki, t; | 
|---|
| 148 | double kd, xd, z, r, r2, y, s; | 
|---|
| 149 |  | 
|---|
| 150 | xd = (double) x; | 
|---|
| 151 | abstop = top13 (x) & 0xfff; /* Ignore sign.  */ | 
|---|
| 152 | if (__glibc_unlikely (abstop >= top13 (38.0f))) | 
|---|
| 153 | { | 
|---|
| 154 | /* |x| >= 38 or x is nan.  */ | 
|---|
| 155 | if (asuint (x) == asuint (-INFINITY)) | 
|---|
| 156 | return 0.0f; | 
|---|
| 157 | if (abstop >= top13 (INFINITY)) | 
|---|
| 158 | return x + x; | 
|---|
| 159 | /* 0x26.8826ap0 is the largest value such that 10^x < 2^128.  */ | 
|---|
| 160 | if (x > 0x26.8826ap0f) | 
|---|
| 161 | return __math_oflowf (0); | 
|---|
| 162 | /* -0x2d.278d4p0 is the smallest value such that 10^x > 2^-150.  */ | 
|---|
| 163 | if (x < -0x2d.278d4p0f) | 
|---|
| 164 | return __math_uflowf (0); | 
|---|
| 165 | #if WANT_ERRNO_UFLOW | 
|---|
| 166 | if (x < -0x2c.da7cfp0) | 
|---|
| 167 | return __math_may_uflowf (0); | 
|---|
| 168 | #endif | 
|---|
| 169 | /* the smallest value such that 10^x >= 2^-126 (normal range) | 
|---|
| 170 | is x = -0x25.ee060p0 */ | 
|---|
| 171 | /* we go through here for 2014929 values out of 2060451840 | 
|---|
| 172 | (not counting NaN and infinities, i.e., about 0.1% */ | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | /* x*N*Ln10/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */ | 
|---|
| 176 | z = InvLn10N * xd; | 
|---|
| 177 | /* |xd| < 38 thus |z| < 1216 */ | 
|---|
| 178 | #if TOINT_INTRINSICS | 
|---|
| 179 | kd = roundtoint (z); | 
|---|
| 180 | ki = converttoint (z); | 
|---|
| 181 | #else | 
|---|
| 182 | # define SHIFT __exp2f_data.shift | 
|---|
| 183 | kd = math_narrow_eval ((double) (z + SHIFT)); /* Needs to be double.  */ | 
|---|
| 184 | ki = asuint64 (kd); | 
|---|
| 185 | kd -= SHIFT; | 
|---|
| 186 | #endif | 
|---|
| 187 | r = z - kd; | 
|---|
| 188 |  | 
|---|
| 189 | /* 10^x = 10^(k/N) * 10^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)  */ | 
|---|
| 190 | t = T[ki % N]; | 
|---|
| 191 | t += ki << (52 - EXP2F_TABLE_BITS); | 
|---|
| 192 | s = asdouble (t); | 
|---|
| 193 | z = C[0] * r + C[1]; | 
|---|
| 194 | r2 = r * r; | 
|---|
| 195 | y = C[2] * r + 1; | 
|---|
| 196 | y = z * r2 + y; | 
|---|
| 197 | y = y * s; | 
|---|
| 198 | return (float) y; | 
|---|
| 199 | } | 
|---|
| 200 | #ifndef __exp10f | 
|---|
| 201 | strong_alias (__exp10f, __ieee754_exp10f) | 
|---|
| 202 | libm_alias_finite (__ieee754_exp10f, __exp10f) | 
|---|
| 203 | /* For architectures that already provided exp10f without SVID support, there | 
|---|
| 204 | is no need to add a new version.  */ | 
|---|
| 205 | #if !LIBM_SVID_COMPAT | 
|---|
| 206 | # define EXP10F_VERSION GLIBC_2_26 | 
|---|
| 207 | #else | 
|---|
| 208 | # define EXP10F_VERSION GLIBC_2_32 | 
|---|
| 209 | #endif | 
|---|
| 210 | versioned_symbol (libm, __exp10f, exp10f, EXP10F_VERSION); | 
|---|
| 211 | libm_alias_float_other (__exp10, exp10) | 
|---|
| 212 | #endif | 
|---|
| 213 |  | 
|---|