-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DRAFT] Added handling for AVX512DQ+AVX512VL+AVX512F to lowerFPToIntToFP #160582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-backend-x86 Author: Kavin Gnanapandithan (KavinTheG) ChangesProgress on issue llvm#160111 Full diff: https://github.com/llvm/llvm-project/pull/160582.diff 1 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feb76e0eb7b4..b98f190f6915b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19897,21 +19897,50 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
- IntVT != MVT::i32)
+ (IntVT != MVT::i32 && IntVT != MVT::i64))
return SDValue();
unsigned SrcSize = SrcVT.getSizeInBits();
unsigned IntSize = IntVT.getSizeInBits();
unsigned VTSize = VT.getSizeInBits();
- MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
- MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
- MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
-
- // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
- unsigned ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- unsigned ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ unsigned ToIntOpcode, ToFPOpcode;
+ unsigned Width = 128;
+ bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
+
+ if (Subtarget.hasAVX512()) {
+ if (Subtarget.hasVLX()) {
+ // AVX512VL could handle for FP_TO_UINT/UINT_TO_FP (f64/32 <-> i32) AVX512F as well but Width = 512
+ if (IntVT == MVT::i32) {
+ ToIntOpcode = IsUnsigned ? X86ISD::CVTTP2UI : X86ISD::CVTTP2SI;
+ ToFPOpcode = IsUnsigned ? X86ISD::CVTUI2P : X86ISD::CVTSI2P;
+ } else {
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+ }
+ } else if (Subtarget.hasDQI() && IntVT == MVT::i64) {
+ // AVX512DQ + AVX512VL could handle f64/32 <-> i64 SINT & UINT
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+ } else {
+ // AVX512F
+ Width = 512;
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+ }
+ } else {
+ if (IntVT != MVT::i32 || IsUnsigned)
+ return SDValue();
+ // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+
+ MVT VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
+ MVT VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
+ MVT VecVT = MVT::getVectorVT(VT, Width / VTSize);
+
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
|
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd. | ||
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || | ||
IntVT != MVT::i32) | ||
(IntVT != MVT::i32 && IntVT != MVT::i64)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You'd be better off moving the hasDQI logic in here to simplify below:
!(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
…vm-project into x86-lowerFPToIntToFP-extend
…vm-project into x86-lowerFPToIntToFP-extend
Fixes #160111