-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][Clang] Allow SSE/AVX COMI/UCOMI/CMPS/CMPP fp comparison intrinsics to be used in constexpr #160876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: mitchell (zeyi2) ChangesThis PR is not ready for review yet. This patch adds constexpr evaluation support for scalar / vector comparison intrinsics. Current status:
Patch is 40.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160876.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b80f733066b65..ea45d4806905d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -58,7 +58,7 @@ let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in
}
// SSE intrinsics
-let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
+let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in {
foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in {
let Features = "sse" in {
def comi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
@@ -3420,7 +3420,7 @@ let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVec
def vp2intersect_d_128 : X86Builtin<"void(_Vector<4, int>, _Vector<4, int>, unsigned char *, unsigned char *)">;
}
-let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vcomish : X86Builtin<"int(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5423d3ca73c81..99d8e036d9f2a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -11,6 +11,7 @@
#include "Interp.h"
#include "InterpBuiltinBitCast.h"
#include "PrimType.h"
+#include "immintrin.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/RecordLayout.h"
#include "clang/Basic/Builtins.h"
@@ -2948,6 +2949,122 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_x86_vcomish(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call) {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ llvm::APSInt R =
+ popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3)));
+ llvm::APSInt P =
+ popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
+ const Pointer &VB = S.Stk.pop<Pointer>();
+ const Pointer &VA = S.Stk.pop<Pointer>();
+
+ llvm::APFloat A0 = VA.elem<Floating>(0).getAPFloat();
+ llvm::APFloat B0 = VB.elem<Floating>(0).getAPFloat();
+ CmpResult cmp = A0.compare(B0);
+
+ bool isEq = cmp == (CmpResult::cmpEqual);
+ bool isGt = cmp == (CmpResult::cmpGreaterThan);
+ bool isLt = cmp == (CmpResult::cmpLessThan);
+ bool result = false;
+
+ switch (P.getZExtValue()) {
+ case _CMP_EQ_OQ: /* _mm_ucomieq_sh */
+ case _CMP_EQ_OS: /* _mm_comieq_sh */
+ result = isEq && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_NEQ_US: /* _mm_comineq_sh */
+ case _CMP_NEQ_UQ: /* _mm_ucomineq_sh */
+ result = !isEq || A0.isNaN() || B0.isNaN();
+ break;
+ case _CMP_GE_OS: /* _mm_comige_sh */
+ case _CMP_GE_OQ: /* _mm_ucomige_sh */
+ result = !isLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_LT_OS: /* _mm_comilt_sh */
+ case _CMP_LT_OQ: /* _mm_ucomilt_sh */
+ result = isLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_GT_OS: /* _mm_comigt_sh */
+ case _CMP_GT_OQ: /* _mm_ucomigt_sh */
+ result = isGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_LE_OS: /* _mm_comile_sh */
+ case _CMP_LE_OQ: /*_mm_ucomile_sh */
+ result = !isGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ default:
+ return false;
+ }
+
+ pushInteger(S, result ? 1 : 0, Call->getType());
+ return true;
+}
+
+static bool interp__builtin_x86_compare_scalar(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ unsigned ID) {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ const Pointer &VB = S.Stk.pop<Pointer>();
+ const Pointer &VA = S.Stk.pop<Pointer>();
+
+ llvm::APFloat A0 = VA.elem<Floating>(0).getAPFloat();
+ llvm::APFloat B0 = VB.elem<Floating>(0).getAPFloat();
+ CmpResult cmp = A0.compare(B0);
+
+ bool isEq = cmp == (CmpResult::cmpEqual);
+ bool isGt = cmp == (CmpResult::cmpGreaterThan);
+ bool isLt = cmp == (CmpResult::cmpLessThan);
+ bool result = false;
+
+ switch (ID) {
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ result = isEq && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ result = !isEq || A0.isNaN() || B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ result = !isLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ result = isLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ result = isGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle:
+ result = !isGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ default:
+ return false;
+ }
+ pushInteger(S, result ? 1 : 0, S.getASTContext().IntTy);
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3685,6 +3802,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_vcomish:
+ return interp__builtin_x86_vcomish(S, OpPC, Frame, Call);
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle:
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ return interp__builtin_x86_compare_scalar(S, OpPC, Frame, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
@@ -3721,8 +3866,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
break;
}
case OffsetOfNode::Array: {
- // When generating bytecode, we put all the index expressions as Sint64 on
- // the stack.
+ // When generating bytecode, we put all the index expressions as Sint64
+ // on the stack.
int64_t Index = ArrayIndices[ArrayIndex];
const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType);
if (!AT)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d10e2afeb2341..2eb5853036e02 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -36,6 +36,7 @@
#include "ByteCode/Frame.h"
#include "ByteCode/State.h"
#include "ExprConstShared.h"
+#include "immintrin.h"
#include "clang/AST/APValue.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTLambda.h"
@@ -11768,7 +11769,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return LHS.lshr(RHS.getZExtValue());
});
-
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
@@ -14760,6 +14760,148 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}
+ case X86::BI__builtin_ia32_vcomish: {
+ APSInt R, P;
+ if (!EvaluateInteger(E->getArg(3), R, Info))
+ return false;
+ if (!EvaluateInteger(E->getArg(2), P, Info))
+ return false;
+ APValue AV, BV;
+ if (!EvaluateVector(E->getArg(0), AV, Info) ||
+ !EvaluateVector(E->getArg(1), BV, Info))
+ return false;
+ if (!AV.isVector() || !BV.isVector() || AV.getVectorLength() == 0 ||
+ BV.getVectorLength() == 0)
+ return false;
+ const APValue &A0V = AV.getVectorElt(0);
+ const APValue &B0V = BV.getVectorElt(0);
+ if (!A0V.isFloat() || !B0V.isFloat())
+ return false;
+ const llvm::APFloat &A0 = A0V.getFloat();
+ const llvm::APFloat &B0 = B0V.getFloat();
+ auto Cmp = A0.compare(B0);
+
+ const bool IsEq = (Cmp == llvm::APFloatBase::cmpEqual);
+ const bool IsLt = (Cmp == llvm::APFloatBase::cmpLessThan);
+ const bool IsGt = (Cmp == llvm::APFloatBase::cmpGreaterThan);
+ bool Result = false;
+
+ switch (P.getExtValue()) {
+ case _CMP_EQ_OQ: /* _mm_ucomieq_sh */
+ case _CMP_EQ_OS: /* _mm_comieq_sh */
+ Result = IsEq && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_NEQ_US: /* _mm_comineq_sh */
+ case _CMP_NEQ_UQ: /* _mm_ucomineq_sh */
+ Result = !IsEq || A0.isNaN() || B0.isNaN();
+ break;
+ case _CMP_GE_OS: /* _mm_comige_sh */
+ case _CMP_GE_OQ: /* _mm_ucomige_sh */
+ Result = !IsLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_LT_OS: /* _mm_comilt_sh */
+ case _CMP_LT_OQ: /* _mm_ucomilt_sh */
+ Result = IsLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_GT_OS: /* _mm_comigt_sh */
+ case _CMP_GT_OQ: /* _mm_ucomigt_sh */
+ Result = IsGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case _CMP_LE_OS: /* _mm_comile_sh */
+ case _CMP_LE_OQ: /*_mm_ucomile_sh */
+ Result = !IsGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ default:
+ return false;
+ }
+ return Success(Result ? 1 : 0, E);
+ }
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle: {
+ APValue AV, BV;
+ if (!EvaluateVector(E->getArg(0), AV, Info) ||
+ !EvaluateVector(E->getArg(1), BV, Info))
+ return false;
+ if (!AV.isVector() || !BV.isVector() || AV.getVectorLength() == 0 ||
+ BV.getVectorLength() == 0)
+ return false;
+ const APValue &A0V = AV.getVectorElt(0);
+ const APValue &B0V = BV.getVectorElt(0);
+ if (!A0V.isFloat() || !B0V.isFloat())
+ return false;
+ const llvm::APFloat &A0 = A0V.getFloat();
+ const llvm::APFloat &B0 = B0V.getFloat();
+ auto Cmp = A0.compare(B0);
+
+ const bool IsEq = (Cmp == llvm::APFloatBase::cmpEqual);
+ const bool IsLt = (Cmp == llvm::APFloatBase::cmpLessThan);
+ const bool IsGt = (Cmp == llvm::APFloatBase::cmpGreaterThan);
+ bool Result = false;
+
+ switch (BuiltinOp) {
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ Result = IsEq && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ Result = !IsEq || A0.isNaN() || B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ Result = !IsLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ Result = IsLt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ Result = IsGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle:
+ Result = !IsGt && !A0.isNaN() && !B0.isNaN();
+ break;
+ default:
+ return false;
+ }
+ return Success(Result ? 1 : 0, E);
+ }
}
}
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 4bd798129a25d..f09338533e9dd 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -298,74 +298,74 @@ _mm512_zextph256_ph512(__m256h __a) {
#define _mm_comi_sh(A, B, pred) \
_mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comieq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comilt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comile_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comigt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comige_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comineq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomieq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomilt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomile_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomigt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomige_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomineq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ,
_MM_FROUND_CUR_DIRECTION);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index fca6229a065be..bfcec3c2ce905 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -999,8 +999,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comieq_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
}
@@ -1023,8 +1023,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a,
- __m128d __b) {
+static __in...
[truncated]
|
@@ -58,7 +58,7 @@ let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in | |||
} | |||
|
|||
// SSE intrinsics | |||
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { | |||
let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you will need to close this block after the cmpsd def and reopen a let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
for the vec_ext_ defs as the aren't constexpr yet (you also need to update against trunk as the pack defs have been moved).
You're also missing a Constexpr attribute for the cmppd256/cmpps256 defs
clang/lib/AST/ExprConstant.cpp
Outdated
@@ -36,6 +36,7 @@ | |||
#include "ByteCode/Frame.h" | |||
#include "ByteCode/State.h" | |||
#include "ExprConstShared.h" | |||
#include "immintrin.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't do this - it can fail on non-x86 builds - just use the raw encodings and add a /_CMP_EQ_OQ/ style comment
7500ccf
to
dcdbe90
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Hi, I implemented six comparison intrinsics: __mm(256)_cmp_ss/sd/ps/pd. But I'm not quite sure whether my adding helper functions in Thanks for taking a look. |
This PR is not ready for review yet.
This patch adds constexpr evaluation support for scalar / vector comparison intrinsics.
Current status:
Current TODOs:
1. add full testcases forcomi/ucomi/vcomish
2. implement_mm_comi_sh
3. imple
cmp(op)
related comparsion4. implement_mm(256)_cmp_..
insn5. Tests
Closes #160473