Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
libc.src.stdfix.idivulr
libc.src.stdfix.idivuk
libc.src.stdfix.idivulk
libc.src.stdfix.rdivi
)
endif()

Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
libc.src.stdfix.idivulr
libc.src.stdfix.idivuk
libc.src.stdfix.idivulk
libc.src.stdfix.rdivi
)
endif()

Expand Down
8 changes: 8 additions & 0 deletions libc/include/stdfix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,11 @@ functions:
arguments:
- type: unsigned long accum
guard: LIBC_COMPILER_HAS_FIXED_POINT
- name: rdivi
standards:
- stdc_ext
return_type: fract
arguments:
- type: int
- type: int
guard: LIBC_COMPILER_HAS_FIXED_POINT
82 changes: 82 additions & 0 deletions libc/src/__support/fixed_point/fx_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/limits.h" // numeric_limits
#include "src/__support/CPP/type_traits.h"
#include "src/__support/libc_assert.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
#include "src/__support/macros/null_check.h" // LIBC_CRASH_ON_VALUE
Expand Down Expand Up @@ -224,6 +225,87 @@ idiv(T x, T y) {
return static_cast<XType>(result);
}

LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
auto v = x0 * (2.lk - (d * x0));
return v;
}

// Divide the two integers and return a fixed_point value
//
// For reference, see:
// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
// https://stackoverflow.com/a/9231996

template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
// If the value of the second operand of the / operator is zero, the
// behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
LIBC_CRASH_ON_VALUE(d, 0);

if (LIBC_UNLIKELY(n == 0)) {
return FXRep<XType>::ZERO();
}
bool result_is_negative = ((n < 0) != (d < 0));

int64_t n64 = static_cast<int64_t>(n);
int64_t d64 = static_cast<int64_t>(d);

uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64);
uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64);

if (d == INT_MIN) {
nv <<= 1;
dv >>= 1;
}

uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1;
uint64_t scaled_val = dv << clz;
// Scale denominator to be in the range of [0.5,1]
FXBits<long accum> d_scaled{scaled_val};
uint64_t scaled_val_n = nv << clz;
// Scale the numerator as much as the denominator to maintain correctness of
// the original equation
FXBits<long accum> n_scaled{scaled_val_n};
long accum n_scaled_val = n_scaled.get_val();
long accum d_scaled_val = d_scaled.get_val();
// x0 = (48/17) - (32/17) * d_n
long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294...
long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529...
// Error of the initial approximation, as derived
// from the wikipedia article is
// E0 = 1/17 = 0.059 (5.9%)
long accum initial_approx = a - (b * d_scaled_val);
// Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235
LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) &&
(initial_approx <= 0x1.f0f0d845p0lk));
// Each newton-raphson iteration will square the error, due
// to quadratic convergence. So,
// E1 = (0.059)^2 = 0.0034
long accum val = nrstep(d_scaled_val, initial_approx);
// E2 = 0.0000121
val = nrstep(d_scaled_val, val);
// E3 = 1.468e−10
val = nrstep(d_scaled_val, val);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can skip further Newton-Raphson steps for smaller types:

if constexpr (FXRep<XType>::FRACTION_LENGTH > 8) {
  val = nrstep(...);
  if constexpr(FXRep<XType>::FRACTION_LENGTH > 16)
    val = nrstep(...);
}

Copy link
Contributor Author

@bojle bojle Sep 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing this, I end up losing precision for finite result tests. For example, 128/256 results in 0.499969. With 3 iterations, the result is precisely 0.5. If the results, even for finite tests is acceptable to be +- epsilon, I can re-write the tests to do EXPECT_LT instead of EXPECT_EQ

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically, to get correct rounding for division, we will need twice the precision. So with 32-bit inputs, we will need 64-bit precision before rounding to the targeted format. In here, we are actually operating on a bit more than 32-bit precision, so there will be results that are not correctly rounded.

Nonetheless, dividing a power of 2 not correctly might be a surprise to people, so maybe we can add a special branch for the case d is a power of 2. WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a performance tradeoff, having performance numbers would be a
good idea. The difference is 3 inline function calls back to back vs a for
loop with a bound only known at runtime. The former has a clear dataflow with
no branches (pipeline friendly?), and latter has a cmp and branch with unknown
loop count. We could measure both, then make this decision?

Copy link
Contributor Author

@bojle bojle Oct 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made these mods to the code:

  long accum val = nrstep(d_scaled_val, initial_approx);
  if (isPowerOfTwo(d)) {
    // E2 = 0.0000121
    val = nrstep(d_scaled_val, val);
    // E3 = 1.468e−10
    val = nrstep(d_scaled_val, val);
  }

and set up a naive time measurement script.

The results:

With contiguos 3 iterations (dividing 76/128 for 10k iterations): 650us
With if statements for po2 (76/128 for 10k iterations): 526us

So, I think I should add checks for po2.

long accum res = n_scaled_val * val;

if (result_is_negative) {
res *= static_cast<long accum>(-1);
}

// Check for overflow before returning
long accum max_val = static_cast<long accum>(FXRep<XType>::MAX());
long accum min_val = static_cast<long accum>(FXRep<XType>::MIN());

// Per clause 7.18a.6.1, saturate values on overflow
if (res > max_val) {
return FXRep<XType>::MAX();
} else if (res < min_val) {
return FXRep<XType>::MIN();
} else {
return static_cast<XType>(res);
}
}

} // namespace fixed_point
} // namespace LIBC_NAMESPACE_DECL

Expand Down
14 changes: 14 additions & 0 deletions libc/src/stdfix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,20 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
)
endforeach()

foreach(suffix IN ITEMS r)
add_entrypoint_object(
${suffix}divi
HDRS
${suffix}divi.h
SRCS
${suffix}divi.cpp
COMPILE_OPTIONS
${libc_opt_high_flag}
DEPENDS
libc.src.__support.fixed_point.fx_bits
)
endforeach()

add_entrypoint_object(
uhksqrtus
HDRS
Expand Down
21 changes: 21 additions & 0 deletions libc/src/stdfix/rdivi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation of rdivi function ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "rdivi.h"
#include "include/llvm-libc-macros/stdfix-macros.h" // fract
#include "src/__support/common.h" // LLVM_LIBC_FUNCTION
#include "src/__support/fixed_point/fx_bits.h" // fixed_point
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(fract, rdivi, (int a, int b)) {
return fixed_point::divi<fract>(a, b);
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/stdfix/rdivi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for rdivi ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDFIX_RDIVI_H
#define LLVM_LIBC_SRC_STDFIX_RDIVI_H

#include "include/llvm-libc-macros/stdfix-macros.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

fract rdivi(int a, int b);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_STDFIX_RDIVI_H
16 changes: 16 additions & 0 deletions libc/test/src/stdfix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
)
endforeach()

foreach(suffix IN ITEMS r)
add_libc_test(
${suffix}divi_test
SUITE
libc-stdfix-tests
HDRS
DivITest.h
SRCS
${suffix}divi_test.cpp
DEPENDS
libc.src.stdfix.${suffix}divi
libc.src.__support.fixed_point.fx_bits
libc.hdr.signal_macros
)
endforeach()

add_libc_test(
uhksqrtus_test
SUITE
Expand Down
74 changes: 74 additions & 0 deletions libc/test/src/stdfix/DivITest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
//===-- Utility class to test fxdivi functions ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/CPP/type_traits.h"
#include "src/__support/fixed_point/fx_bits.h"
#include "src/__support/fixed_point/fx_rep.h"
#include "test/UnitTest/Test.h"

template <typename XType> XType get_epsilon() = delete;
template <> fract get_epsilon() { return FRACT_EPSILON; }
template <> unsigned fract get_epsilon() { return UFRACT_EPSILON; }
template <> long fract get_epsilon() { return LFRACT_EPSILON; }

template <typename XType>
class DivITest : public LIBC_NAMESPACE::testing::Test {
using FXRep = LIBC_NAMESPACE::fixed_point::FXRep<XType>;
using FXBits = LIBC_NAMESPACE::fixed_point::FXBits<XType>;

public:
typedef XType (*DivIFunc)(int, int);

void testBasic(DivIFunc func) {
XType epsilon = get_epsilon<XType>();
EXPECT_LT((func(2, 3) - 0.666656494140625r), epsilon);
EXPECT_LT((func(3, 4) - 0.75r), epsilon);
EXPECT_LT((func(1043, 2764) - 0.3773516643r), epsilon);
EXPECT_LT((func(60000, 720293) - 0.08329943509r), epsilon);

EXPECT_EQ(func(128, 256), 0.5r);
EXPECT_EQ(func(1, 2), 0.5r);
EXPECT_EQ(func(1, 4), 0.25r);
EXPECT_EQ(func(1, 8), 0.125r);
EXPECT_EQ(func(1, 16), 0.0625r);

EXPECT_EQ(func(-1, 2), -0.5r);
EXPECT_EQ(func(1, -4), -0.25r);
EXPECT_EQ(func(-1, 8), -0.125r);
EXPECT_EQ(func(1, -16), -0.0625r);
}

void testSpecial(DivIFunc func) {
XType epsilon = get_epsilon<XType>();
EXPECT_EQ(func(0, 10), 0.r);
EXPECT_EQ(func(0, -10), 0.r);
EXPECT_EQ(func(-(1 << FRACT_FBIT), 1 << FRACT_FBIT), FRACT_MIN);
EXPECT_EQ(func((1 << FRACT_FBIT) - 1, 1 << FRACT_FBIT), FRACT_MAX);
// From Section 7.18a.6.1, functions returning a fixed-point value, the
// return value is saturated on overflow.
EXPECT_EQ(func(INT_MAX, INT_MAX), FRACT_MAX);
EXPECT_LT(func(INT_MAX - 1, INT_MAX) - 0.99999999r, epsilon);
EXPECT_EQ(func(INT_MIN, INT_MAX), FRACT_MIN);
// Expecting 0 here as fract is not precise enough to
// handle 1/INT_MAX
EXPECT_LT(func(1, INT_MAX) - 0.r, epsilon);
// This results in 1.1739, which should be saturated to FRACT_MAX
EXPECT_EQ(func(27, 23), FRACT_MAX);

EXPECT_EQ(func(INT_MIN, 1), FRACT_MIN);
EXPECT_LT(func(1, INT_MIN) - 0.r, epsilon);

EXPECT_EQ(func(INT_MIN, INT_MIN), 1.r);
}
};

#define LIST_DIVI_TESTS(Name, XType, func) \
using LlvmLibc##Name##diviTest = DivITest<XType>; \
TEST_F(LlvmLibc##Name##diviTest, Basic) { testBasic(&func); } \
TEST_F(LlvmLibc##Name##diviTest, Special) { testSpecial(&func); } \
static_assert(true, "Require semicolon.")
14 changes: 14 additions & 0 deletions libc/test/src/stdfix/rdivi_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//===-- Unittests for rdivi -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "DivITest.h"

#include "llvm-libc-macros/stdfix-macros.h" // fract
#include "src/stdfix/rdivi.h"

LIST_DIVI_TESTS(r, fract, LIBC_NAMESPACE::rdivi);
Loading