Skip to content

Conversation

KavinTheG
Copy link
Contributor

@KavinTheG KavinTheG commented Sep 24, 2025

Fixes #160111

Copy link

github-actions bot commented Sep 24, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RKSimon RKSimon self-requested a review September 24, 2025 19:23
@RKSimon RKSimon changed the title [DRAFT] Added hanlding for AVX512DQ+AVX512VL+AVX512F to lowerFPToIntToFP [DRAFT] Added handling for AVX512DQ+AVX512VL+AVX512F to lowerFPToIntToFP Sep 25, 2025
@KavinTheG KavinTheG marked this pull request as ready for review September 26, 2025 14:33
@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2025

@llvm/pr-subscribers-backend-x86

Author: Kavin Gnanapandithan (KavinTheG)

Changes

Progress on issue llvm#160111


Full diff: https://github.com/llvm/llvm-project/pull/160582.diff

1 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+39-10)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feb76e0eb7b4..b98f190f6915b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19897,21 +19897,50 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
   // See if we have 128-bit vector cast instructions for this type of cast.
   // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
   if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
-      IntVT != MVT::i32)
+      (IntVT != MVT::i32 && IntVT != MVT::i64))
     return SDValue();
 
   unsigned SrcSize = SrcVT.getSizeInBits();
   unsigned IntSize = IntVT.getSizeInBits();
   unsigned VTSize = VT.getSizeInBits();
-  MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
-  MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
-  MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
-
-  // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
-  unsigned ToIntOpcode =
-      SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
-  unsigned ToFPOpcode =
-      IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+  unsigned ToIntOpcode, ToFPOpcode;
+  unsigned Width = 128;
+  bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
+
+  if (Subtarget.hasAVX512()) {
+    if (Subtarget.hasVLX())  {
+      // AVX512VL could handle for FP_TO_UINT/UINT_TO_FP (f64/32 <-> i32) AVX512F as well but Width = 512
+      if (IntVT == MVT::i32) {
+        ToIntOpcode = IsUnsigned ? X86ISD::CVTTP2UI : X86ISD::CVTTP2SI;  
+        ToFPOpcode = IsUnsigned ? X86ISD::CVTUI2P : X86ISD::CVTSI2P;  
+      } else {
+        ToIntOpcode = CastToInt.getOpcode();
+        ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+      } 
+    } else if (Subtarget.hasDQI() && IntVT == MVT::i64) {
+      // AVX512DQ + AVX512VL could handle f64/32 <-> i64 SINT & UINT
+      ToIntOpcode = CastToInt.getOpcode(); 
+      ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+    } else {
+      // AVX512F
+      Width = 512;
+      ToIntOpcode = CastToInt.getOpcode(); 
+      ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+    }
+  } else {
+    if (IntVT != MVT::i32 || IsUnsigned)
+      return SDValue();
+    // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
+    ToIntOpcode =
+        SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+    ToFPOpcode =
+        IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+  }
+
+  MVT VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
+  MVT VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
+  MVT VecVT = MVT::getVectorVT(VT, Width / VTSize);
+
 
   // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
   //

@RKSimon RKSimon marked this pull request as draft September 29, 2025 10:15
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
IntVT != MVT::i32)
(IntVT != MVT::i32 && IntVT != MVT::i64))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'd be better off moving the hasDQI logic in here to simplify below:

!(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))

@KavinTheG KavinTheG closed this Oct 9, 2025
@KavinTheG KavinTheG deleted the x86-lowerFPToIntToFP-extend branch October 9, 2025 14:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86] lowerFPToIntToFP - handle UI2FP on AVX512VL targets and i64 types on AVX512DQ targets

3 participants