Skip to content

Conversation

bassiounix
Copy link
Contributor

@bassiounix bassiounix commented Sep 29, 2025

@bassiounix bassiounix added bazel "Peripheral" support tier build system: utils/bazel libc labels Sep 29, 2025 — with Graphite App
@bassiounix bassiounix requested a review from lntue September 29, 2025 23:29
@bassiounix bassiounix marked this pull request as ready for review September 29, 2025 23:29
@llvmbot
Copy link
Member

llvmbot commented Sep 29, 2025

@llvm/pr-subscribers-libc

Author: Muhammad Bassiouni (bassiounix)

Changes

Part of #147386

in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450


Patch is 69.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161297.diff

24 Files Affected:

  • (modified) libc/shared/math.h (+1)
  • (added) libc/shared/math/exp2.h (+23)
  • (modified) libc/src/__support/math/CMakeLists.txt (+31)
  • (renamed) libc/src/__support/math/common_constants.h (+34-13)
  • (added) libc/src/__support/math/exp2.h (+425)
  • (modified) libc/src/math/generic/CMakeLists.txt (+14-40)
  • (removed) libc/src/math/generic/common_constants.h (-73)
  • (modified) libc/src/math/generic/exp2.cpp (+2-396)
  • (modified) libc/src/math/generic/expm1.cpp (+4-1)
  • (modified) libc/src/math/generic/expm1f.cpp (+2-1)
  • (modified) libc/src/math/generic/log.cpp (+3-1)
  • (modified) libc/src/math/generic/log10.cpp (+4-1)
  • (modified) libc/src/math/generic/log10f.cpp (+2-1)
  • (modified) libc/src/math/generic/log1p.cpp (+3-1)
  • (modified) libc/src/math/generic/log1pf.cpp (+3-1)
  • (modified) libc/src/math/generic/log2.cpp (+4-1)
  • (modified) libc/src/math/generic/log2f.cpp (+3-2)
  • (modified) libc/src/math/generic/log_range_reduction.h (+2-1)
  • (modified) libc/src/math/generic/logf.cpp (+2-1)
  • (modified) libc/src/math/generic/pow.cpp (+4-1)
  • (modified) libc/src/math/generic/powf.cpp (+5-1)
  • (modified) libc/test/shared/CMakeLists.txt (+1)
  • (modified) libc/test/shared/shared_math_test.cpp (+1)
  • (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+46-39)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 4b2a0d8c712ad..924d0cb4f3bfa 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -47,6 +47,7 @@
 #include "math/exp10f16.h"
 #include "math/exp10m1f.h"
 #include "math/exp10m1f16.h"
+#include "math/exp2.h"
 #include "math/expf.h"
 #include "math/expf16.h"
 #include "math/frexpf.h"
diff --git a/libc/shared/math/exp2.h b/libc/shared/math/exp2.h
new file mode 100644
index 0000000000000..6f1e143376a8a
--- /dev/null
+++ b/libc/shared/math/exp2.h
@@ -0,0 +1,23 @@
+//===-- Shared exp2 function ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2_H
+#define LLVM_LIBC_SHARED_MATH_EXP2_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp2.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 98f9bb42f91f4..4130fdf02078b 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -373,6 +373,15 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  common_constants
+  HDRS
+    common_constants.h
+  DEPENDS
+    libc.src.__support.macros.config
+    libc.src.__support.number_pair
+)
+
 add_header_library(
   cos
   HDRS
@@ -704,6 +713,28 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  exp2
+  HDRS
+    exp2.h
+  DEPENDS
+    .common_constants
+    .exp_utils
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.optional
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.triple_double
+    libc.src.__support.integer_literals
+    libc.src.__support.macros.optimization
+    libc.src.errno.errno
+)
+
 add_header_library(
   exp10
   HDRS
diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/__support/math/common_constants.h
similarity index 95%
rename from libc/src/math/generic/common_constants.cpp
rename to libc/src/__support/math/common_constants.h
index 2a15df243b5a4..53abbfeef3412 100644
--- a/libc/src/math/generic/common_constants.cpp
+++ b/libc/src/__support/math/common_constants.h
@@ -6,12 +6,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "common_constants.h"
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_COMMON_CONSTANTS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_COMMON_CONSTANTS_H
+
 #include "src/__support/macros/config.h"
 #include "src/__support/number_pair.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
+namespace common_constants_internal {
+
+// log(2) generated by Sollya with:
+// > a = 2^-43 * nearestint(2^43*log(2));
+// LSB = 2^-43 is chosen so that e_x * LOG_2_HI is exact for -1075 < e_x < 1024.
+static constexpr double LOG_2_HI = 0x1.62e42fefa38p-1; // LSB = 2^-43
+// > b = round(log10(2) - a, D, RN);
+static constexpr double LOG_2_LO = 0x1.ef35793c7673p-45; // LSB = 2^-97
+
+// Minimax polynomial for (log(1 + x) - x)/x^2, generated by sollya with:
+// > P = fpminimax((log(1 + x) - x)/x^2, 5, [|D...|], [-2^-8, 2^-7]);
+constexpr double LOG_COEFFS[6] = {-0x1.fffffffffffffp-2, 0x1.5555555554a9bp-2,
+                                  -0x1.0000000094567p-2, 0x1.99999dcc9823cp-3,
+                                  -0x1.55550ac2e537ap-3, 0x1.21a02c4e624d7p-3};
+
 // Range reduction constants for logarithms.
 // r(0) = 1, r(127) = 0.5
 // r(k) = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + k*2^-7))
@@ -19,7 +36,7 @@ namespace LIBC_NAMESPACE_DECL {
 // precision, and -2^-8 <= v < 2^-7.
 // TODO(lntue): Add reference to how the constants are derived after the
 // resulting paper is ready.
-alignas(8) const float R[128] = {
+alignas(8) static constexpr float R[128] = {
     0x1p0,     0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1,  0x1.ecp-1, 0x1.e8p-1,
     0x1.e4p-1, 0x1.ep-1,  0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
     0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1,  0x1.bep-1, 0x1.bap-1,
@@ -40,7 +57,7 @@ alignas(8) const float R[128] = {
     0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1,
     0x1.02p-1, 0x1.0p-1};
 
-const double RD[128] = {
+static constexpr double RD[128] = {
     0x1p0,     0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1,  0x1.ecp-1, 0x1.e8p-1,
     0x1.e4p-1, 0x1.ep-1,  0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
     0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1,  0x1.bep-1, 0x1.bap-1,
@@ -65,7 +82,7 @@ const double RD[128] = {
 // available.
 // Generated by Sollya with the formula: CD[i] = RD[i]*(1 + i*2^-7) - 1
 // for RD[i] defined on the table above.
-const double CD[128] = {
+static constexpr double CD[128] = {
     0.0,        -0x1p-14,   -0x1p-12,   -0x1.2p-11,  -0x1p-10,   -0x1.9p-10,
     -0x1.2p-9,  -0x1.88p-9, -0x1p-8,    -0x1.9p-11,  -0x1.fp-10, -0x1.9cp-9,
     -0x1p-12,   -0x1.cp-10, -0x1.bp-9,  -0x1.5p-11,  -0x1.4p-9,  0x1p-14,
@@ -90,7 +107,7 @@ const double CD[128] = {
     -0x1p-14,   -0x1p-8,
 };
 
-const double LOG_R[128] = {
+static constexpr double LOG_R[128] = {
     0x0.0000000000000p0,  0x1.010157588de71p-7, 0x1.0205658935847p-6,
     0x1.8492528c8cabfp-6, 0x1.0415d89e74444p-5, 0x1.466aed42de3eap-5,
     0x1.894aa149fb343p-5, 0x1.ccb73cdddb2ccp-5, 0x1.08598b59e3a07p-4,
@@ -135,7 +152,7 @@ const double LOG_R[128] = {
     0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1,  0x1.5af405c3649ep-1,
     0x1.5ee82aa24192p-1,  0x0.000000000000p0};
 
-const double LOG2_R[128] = {
+static constexpr double LOG2_R[128] = {
     0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
     0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
     0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
@@ -188,7 +205,7 @@ const double LOG2_R[128] = {
 //     print("{", -c, ",", -b, "},");
 //   };
 // We replace LOG_R[0] with log10(1.0) == 0.0
-alignas(16) const NumberPair<double> LOG_R_DD[128] = {
+alignas(16) static constexpr NumberPair<double> LOG_R_DD[128] = {
     {0.0, 0.0},
     {-0x1.0c76b999d2be8p-46, 0x1.010157589p-7},
     {-0x1.3dc5b06e2f7d2p-45, 0x1.0205658938p-6},
@@ -324,7 +341,7 @@ alignas(16) const NumberPair<double> LOG_R_DD[128] = {
 // Output range:
 //   [-0x1.3ffcp-15, 0x1.3e3dp-15]
 // We store S2[i] = 2^16 (r(i - 2^6) - 1).
-alignas(8) const int S2[193] = {
+alignas(8) static constexpr int S2[193] = {
     0x101,  0xfd,   0xf9,   0xf5,   0xf1,   0xed,   0xe9,   0xe5,   0xe1,
     0xdd,   0xd9,   0xd5,   0xd1,   0xcd,   0xc9,   0xc5,   0xc1,   0xbd,
     0xb9,   0xb4,   0xb0,   0xac,   0xa8,   0xa4,   0xa0,   0x9c,   0x98,
@@ -348,7 +365,7 @@ alignas(8) const int S2[193] = {
     -0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
     -0x1f0, -0x1f4, -0x1f8, -0x1fc};
 
-const double R2[193] = {
+static constexpr double R2[193] = {
     0x1.0101p0,  0x1.00fdp0,  0x1.00f9p0,  0x1.00f5p0,  0x1.00f1p0,
     0x1.00edp0,  0x1.00e9p0,  0x1.00e5p0,  0x1.00e1p0,  0x1.00ddp0,
     0x1.00d9p0,  0x1.00d5p0,  0x1.00d1p0,  0x1.00cdp0,  0x1.00c9p0,
@@ -395,7 +412,7 @@ const double R2[193] = {
 // Output range:
 //   [-0x1.01928p-22 , 0x1p-22]
 // We store S[i] = 2^21 (r(i - 80) - 1).
-alignas(8) const int S3[161] = {
+alignas(8) static constexpr int S3[161] = {
     0x50,  0x4f,  0x4e,  0x4d,  0x4c,  0x4b,  0x4a,  0x49,  0x48,  0x47,  0x46,
     0x45,  0x44,  0x43,  0x42,  0x41,  0x40,  0x3f,  0x3e,  0x3d,  0x3c,  0x3b,
     0x3a,  0x39,  0x38,  0x37,  0x36,  0x35,  0x34,  0x33,  0x32,  0x31,  0x30,
@@ -418,7 +435,7 @@ alignas(8) const int S3[161] = {
 // Output range:
 //   [-0x1.0002143p-29 , 0x1p-29]
 // We store S[i] = 2^28 (r(i - 65) - 1).
-alignas(8) const int S4[130] = {
+alignas(8) static constexpr int S4[130] = {
     0x41,  0x40,  0x3f,  0x3e,  0x3d,  0x3c,  0x3b,  0x3a,  0x39,  0x38,  0x37,
     0x36,  0x35,  0x34,  0x33,  0x32,  0x31,  0x30,  0x2f,  0x2e,  0x2d,  0x2c,
     0x2b,  0x2a,  0x29,  0x28,  0x27,  0x26,  0x25,  0x24,  0x23,  0x22,  0x21,
@@ -439,7 +456,7 @@ alignas(8) const int S4[130] = {
 // Table is generated with Sollya as follow:
 // > display = hexadecimal;
 // > for i from -104 to 89 do { D(exp(i)); };
-const double EXP_M1[195] = {
+static constexpr double EXP_M1[195] = {
     0x1.f1e6b68529e33p-151, 0x1.525be4e4e601dp-149, 0x1.cbe0a45f75eb1p-148,
     0x1.3884e838aea68p-146, 0x1.a8c1f14e2af5dp-145, 0x1.20a717e64a9bdp-143,
     0x1.8851d84118908p-142, 0x1.0a9bdfb02d240p-140, 0x1.6a5bea046b42ep-139,
@@ -511,7 +528,7 @@ const double EXP_M1[195] = {
 // Table is generated with Sollya as follow:
 // > display = hexadecimal;
 // > for i from 0 to 127 do { D(exp(i / 128)); };
-const double EXP_M2[128] = {
+static constexpr double EXP_M2[128] = {
     0x1.0000000000000p0, 0x1.0202015600446p0, 0x1.04080ab55de39p0,
     0x1.06122436410ddp0, 0x1.08205601127edp0, 0x1.0a32a84e9c1f6p0,
     0x1.0c49236829e8cp0, 0x1.0e63cfa7ab09dp0, 0x1.1082b577d34edp0,
@@ -557,4 +574,8 @@ const double EXP_M2[128] = {
     0x1.568bb722dd593p1, 0x1.593b7d72305bbp1,
 };
 
+} // namespace common_constants_internal
+
 } // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_COMMON_CONSTANTS_H
diff --git a/libc/src/__support/math/exp2.h b/libc/src/__support/math/exp2.h
new file mode 100644
index 0000000000000..7eaa465f93841
--- /dev/null
+++ b/libc/src/__support/math/exp2.h
@@ -0,0 +1,425 @@
+//===-- Implementation header for exp2 --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2_H
+
+#include "common_constants.h" // Lookup tables EXP2_MID1 and EXP_M2.
+#include "exp_constants.h"
+#include "exp_utils.h" // ziv_test_denorm.
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/triple_double.h"
+#include "src/__support/common.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace exp2_internal {
+
+using namespace common_constants_internal;
+
+using fputil::DoubleDouble;
+using fputil::TripleDouble;
+using Float128 = typename fputil::DyadicFloat<128>;
+
+using LIBC_NAMESPACE::operator""_u128;
+
+// Error bounds:
+// Errors when using double precision.
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+constexpr double ERR_D = 0x1.0p-63;
+#else
+constexpr double ERR_D = 0x1.8p-63;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Errors when using double-double precision.
+constexpr double ERR_DD = 0x1.0p-100;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Polynomial approximations with double precision.  Generated by Sollya with:
+// > P = fpminimax((2^x - 1)/x, 3, [|D...|], [-2^-13 - 2^-30, 2^-13 + 2^-30]);
+// > P;
+// Error bounds:
+//   | output - (2^dx - 1) / dx | < 1.5 * 2^-52.
+LIBC_INLINE static double poly_approx_d(double dx) {
+  // dx^2
+  double dx2 = dx * dx;
+  double c0 =
+      fputil::multiply_add(dx, 0x1.ebfbdff82c58ep-3, 0x1.62e42fefa39efp-1);
+  double c1 =
+      fputil::multiply_add(dx, 0x1.3b2aba7a95a89p-7, 0x1.c6b08e8fc0c0ep-5);
+  double p = fputil::multiply_add(dx2, c1, c0);
+  return p;
+}
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Polynomial approximation with double-double precision.  Generated by Solya
+// with:
+// > P = fpminimax((2^x - 1)/x, 5, [|DD...|], [-2^-13 - 2^-30, 2^-13 + 2^-30]);
+// Error bounds:
+//   | output - 2^(dx) | < 2^-101
+LIBC_INLINE static constexpr DoubleDouble
+poly_approx_dd(const DoubleDouble &dx) {
+  // Taylor polynomial.
+  constexpr DoubleDouble COEFFS[] = {
+      {0, 0x1p0},
+      {0x1.abc9e3b39824p-56, 0x1.62e42fefa39efp-1},
+      {-0x1.5e43a53e4527bp-57, 0x1.ebfbdff82c58fp-3},
+      {-0x1.d37963a9444eep-59, 0x1.c6b08d704a0cp-5},
+      {0x1.4eda1a81133dap-62, 0x1.3b2ab6fba4e77p-7},
+      {-0x1.c53fd1ba85d14p-64, 0x1.5d87fe7a265a5p-10},
+      {0x1.d89250b013eb8p-70, 0x1.430912f86cb8ep-13},
+  };
+
+  DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
+                                    COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
+  return p;
+}
+
+// Polynomial approximation with 128-bit precision:
+// Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
+// For |dx| < 2^-13 + 2^-30:
+//   | output - exp(dx) | < 2^-126.
+LIBC_INLINE static constexpr Float128 poly_approx_f128(const Float128 &dx) {
+  constexpr Float128 COEFFS_128[]{
+      {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
+      {Sign::POS, -128, 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128},
+      {Sign::POS, -128, 0x3d7f7bff'058b1d50'de2d60dd'9c9a1d9f_u128},
+      {Sign::POS, -132, 0xe35846b8'2505fc59'9d3b15d9'e7fb6897_u128},
+      {Sign::POS, -134, 0x9d955b7d'd273b94e'184462f6'bcd2b9e7_u128},
+      {Sign::POS, -137, 0xaec3ff3c'53398883'39ea1bb9'64c51a89_u128},
+      {Sign::POS, -138, 0x2861225f'345c396a'842c5341'8fa8ae61_u128},
+      {Sign::POS, -144, 0xffe5fe2d'109a319d'7abeb5ab'd5ad2079_u128},
+  };
+
+  Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
+                                COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
+                                COEFFS_128[6], COEFFS_128[7]);
+  return p;
+}
+
+// Compute 2^(x) using 128-bit precision.
+// TODO(lntue): investigate triple-double precision implementation for this
+// step.
+LIBC_INLINE static constexpr Float128 exp2_f128(double x, int hi, int idx1,
+                                                int idx2) {
+  Float128 dx = Float128(x);
+
+  // TODO: Skip recalculating exp_mid1 and exp_mid2.
+  Float128 exp_mid1 =
+      fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
+                        fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
+                                          Float128(EXP2_MID1[idx1].lo)));
+
+  Float128 exp_mid2 =
+      fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
+                        fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
+                                          Float128(EXP2_MID2[idx2].lo)));
+
+  Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+
+  Float128 p = poly_approx_f128(dx);
+
+  Float128 r = fputil::quick_mul(exp_mid, p);
+
+  r.exponent += hi;
+
+  return r;
+}
+
+// Compute 2^x with double-double precision.
+LIBC_INLINE static DoubleDouble
+exp2_double_double(double x, const DoubleDouble &exp_mid) {
+  DoubleDouble dx({0, x});
+
+  // Degree-6 polynomial approximation in double-double precision.
+  // | p - 2^x | < 2^-103.
+  DoubleDouble p = poly_approx_dd(dx);
+
+  // Error bounds: 2^-102.
+  DoubleDouble r = fputil::quick_mult(exp_mid, p);
+
+  return r;
+}
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// When output is denormal.
+LIBC_INLINE static double exp2_denorm(double x) {
+  // Range reduction.
+  int k =
+      static_cast<int>(cpp::bit_cast<uint64_t>(x + 0x1.8000'0000'4p21) >> 19);
+  double kd = static_cast<double>(k);
+
+  uint32_t idx1 = (k >> 6) & 0x3f;
+  uint32_t idx2 = k & 0x3f;
+
+  int hi = k >> 12;
+
+  DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
+  DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
+  DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
+
+  // |dx| < 2^-13 + 2^-30.
+  double dx = fputil::multiply_add(kd, -0x1.0p-12, x); // exact
+
+  double mid_lo = dx * exp_mid.hi;
+
+  // Approximate (2^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
+  double p = poly_approx_d(dx);
+
+  double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
+      .value();
+#else
+  if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
+      LIBC_LIKELY(r.has_value()))
+    return r.value();
+
+  // Use double-double
+  DoubleDouble r_dd = exp2_double_double(dx, exp_mid);
+
+  if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, ERR_DD);
+      LIBC_LIKELY(r.has_value()))
+    return r.value();
+
+  // Use 128-bit precision
+  Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);
+
+  return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+// Check for exceptional cases when:
+//  * log2(1 - 2^-54) < x < log2(1 + 2^-53)
+//  * x >= 1024
+//  * x <= -1022
+//  * x is inf or nan
+LIBC_INLINE static constexpr double set_exceptional(double x) {
+  using FPBits = typename fputil::FPBits<double>;
+  FPBits xbits(x);
+
+  uint64_t x_u = xbits.uintval();
+  uint64_t x_abs = xbits.abs().uintval();
+
+  // |x| < log2(1 + 2^-53)
+  if (x_abs <= 0x3ca71547652b82fd) {
+    // 2^(x) ~ 1 + x/2
+    return fputil::multiply_add(x, 0.5, 1.0);
+  }
+
+  // x <= -1022 || x >= 1024 or inf/nan.
+  if (x_u > 0xc08ff00000000000) {
+    // x <= -1075 or -inf/nan
+    if (x_u >= 0xc090cc0000000000) {
+      // exp(-Inf) = 0
+      if (xbits.is_inf())
+        return 0.0;
+
+      // exp(nan) = nan
+      if (xbits.is_nan())
+        return x;
+
+      if (fputil::quick_get_round() == FE_UPWARD)
+        return FPBits::min_subnormal().get_val();
+      fputil::set_errno_if_required(ERANGE);
+      fputil::raise_except_if_required(FE_UNDERFLOW);
+      return 0.0;
+    }
+
+    return exp2_denorm(x);
+  }
+
+  // x >= 1024 or +inf/nan
+  // x is finite
+  if (x_u < 0x7ff0'0000'0000'0000ULL) {
+    int rounding = fputil::quick_get_round();
+    if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+      return FPBits::max_normal().get_val();
+
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_OVERFLOW);
+  }
+  // x is +inf or nan
+  return x + FPBits::inf().get_val();
+}
+
+} // namespace exp2_internal
+
+LIBC_INLINE static constexpr double exp2(double x) {
+  using namespace exp2_internal;
+  using FPBits = typename fputil::FPBits<double>;
+  FPBits xbits(x);
+
+  uint64_t x_u = xbits.uintval();
+
+  // x < -1022 or x >= 1024 or log2(1 - 2^-54) < x < log2(1 + 2^-53).
+  if (LIBC_UNLIKELY(x_u > 0xc08ff00000000000 ||
+                    (x_u <= 0xbc971547652b82fe && x_u >= 0x4090000000000000) ||
+                    x_u <= 0x3ca71547652b82fd)) {
+    return set_exceptional(x);
+  }
+
+  // Now -1075 < x <= log2(1 - 2^-54) or log2(1 + 2^-53) < x < 1024
+
+  // Range reduction:
+  // Let x = (hi + mid1 + mid2) + lo
+  // in which:
+  //   hi is an integer
+  //   mid1 * 2^6 is an integer
+  //   mid2 * 2^12 is an integer
+  // then:
+  //   2^(x) = 2^hi * 2^(mid1) * 2^(mid2) * 2^(lo).
+  // With this formula:
+  //   - multiplying by 2^hi is exact and cheap, simply by adding the exponent
+  //     field.
+  //   - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
+  //   - 2^(lo) ~ 1 + a0*lo + a1 * lo^2 + ...
+  //
+  // We compute (hi + mid1 + mid2) together by perform the rounding on x * 2^12.
+  // Since |x| < |-1075)| < 2^11,
+  //   |x * 2^12| < 2^11 * 2^12 < 2^23,
+  // So we can fit the rounded result round(x * 2^12) in int32_t.
+  // Thus, the goal is to be able to use an additional addition and fixed width
+  // shift to get an int32_t representing round(x * 2^12).
+  //...
[truncated]

Copy link
Contributor Author

bassiounix commented Sep 29, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-30-_libc_math_refactor_exp2_implementation_to_header-only_in_src___support_math_folder branch from df22dd9 to 656cc35 Compare September 29, 2025 23:48
@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-30-_libc_math_refactor_exp2_implementation_to_header-only_in_src___support_math_folder branch from 656cc35 to e333d5a Compare September 29, 2025 23:52
@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-29-_libc_math_refactor_exp10m1f16_implementation_to_header-only_in_src___support_math_folder branch 2 times, most recently from 4843827 to 7b6d71c Compare October 1, 2025 08:15
Base automatically changed from users/bassiounix/spr/09-29-_libc_math_refactor_exp10m1f16_implementation_to_header-only_in_src___support_math_folder to main October 1, 2025 08:21
@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-30-_libc_math_refactor_exp2_implementation_to_header-only_in_src___support_math_folder branch from e333d5a to a4cf62e Compare October 1, 2025 08:24
@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-30-_libc_math_refactor_exp2_implementation_to_header-only_in_src___support_math_folder branch from a4cf62e to d06e697 Compare October 5, 2025 03:10
@bassiounix bassiounix force-pushed the users/bassiounix/spr/09-30-_libc_math_refactor_exp2_implementation_to_header-only_in_src___support_math_folder branch from d06e697 to d2bbd3b Compare October 5, 2025 03:25
Copy link
Contributor Author

bassiounix commented Oct 5, 2025

Merge activity

  • Oct 5, 3:26 AM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 5, 3:31 AM UTC: @bassiounix merged this pull request with Graphite.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bazel "Peripheral" support tier build system: utils/bazel libc
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants