llvm · perlfu · Sep 1, 2025 · Sep 25, 2025 · Sep 29, 2025 · Sep 29, 2025
diff --git a/llvm/include/llvm/ADT/GenericSSAContext.h b/llvm/include/llvm/ADT/GenericSSAContext.h
@@ -54,7 +54,7 @@ template <typename _FunctionT> class GenericSSAContext {
 
   // The null value for ValueRefT. For LLVM IR and MIR, this is simply the
   // default constructed value.
-  static constexpr ValueRefT *ValueRefNull = {};
+  static constexpr ValueRefT ValueRefNull = {};
 
   // An InstructionT usually defines one or more ValueT objects.
   using InstructionT = typename SSATraits::InstructionT;
@@ -96,6 +96,10 @@ template <typename _FunctionT> class GenericSSAContext {
   static bool isConstantOrUndefValuePhi(const InstructionT &Instr);
   const BlockT *getDefBlock(ConstValueRefT value) const;
 
+  void getPhiInputs(const InstructionT &Instr,
+                    SmallVectorImpl<ConstValueRefT> &Values,
+                    SmallVectorImpl<const BlockT *> &Blocks) const;
+
   Printable print(const BlockT *block) const;
   Printable printAsOperand(const BlockT *BB) const;
   Printable print(const InstructionT *inst) const;

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   /// the worklist.
   void taintAndPushAllDefs(const BlockT &JoinBlock);
 
-  /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
-  /// the worklist.
-  void taintAndPushPhiNodes(const BlockT &JoinBlock);
+  /// \brief Mark phi nodes in \p JoinBlock as divergent and push them on
+  /// the worklist if they are divergent over the path from \p JoinBlock
+  /// to \p DivTermBlock.
+  void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock,
+                            const DivergenceDescriptorT &DivDesc);
 
   /// \brief Identify all Instructions that become divergent because \p DivExit
   /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
@@ -917,19 +919,48 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
 /// Mark divergent phi nodes in a join block
 template <typename ContextT>
 void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
-    const BlockT &JoinBlock) {
+    const BlockT &JoinBlock, const BlockT &DivTermBlock,
+    const DivergenceDescriptorT &DivDesc) {
   LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
                     << "\n");
   for (const auto &Phi : JoinBlock.phis()) {
-    // FIXME: The non-undef value is not constant per se; it just happens to be
-    // uniform and may not dominate this PHI. So assuming that the same value
-    // reaches along all incoming edges may itself be undefined behaviour. This
-    // particular interpretation of the undef value was added to
-    // DivergenceAnalysis in the following review:
-    //
-    // https://reviews.llvm.org/D19013
-    if (ContextT::isConstantOrUndefValuePhi(Phi))
+    // Attempt to maintain uniformity for PHIs by considering control
+    // dependencies before marking them.
+    SmallVector<ConstValueRefT> Values;
+    SmallVector<const BlockT *> Blocks;
+    Context.getPhiInputs(Phi, Values, Blocks);
+    assert(Blocks.size() == Values.size());
+
+    // Allow an empty Blocks/Values list to signify getPhiInputs is not
+    // implemented; in which case no uniformity is possible.
+    bool Uniform = !Values.empty();
+
+    std::optional<ConstValueRefT> CommonValue;
+    for (unsigned I = 0; I < Blocks.size() && Uniform; ++I) {
+      // FIXME: We assume undefs are uniform and/or do not dominate the PHI
+      // in the presence of other constant or uniform values.
+      // This particular interpretation of the undef value was added to
+      // DivergenceAnalysis in the following review:
+      //
+      // https://reviews.llvm.org/D19013
+      if (!Values[I])
+        continue;
+
+      // Only consider predecessors on divergent path.
+      if (Blocks[I] != &DivTermBlock &&
+          !DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr))
+        continue;
+
+      // Phi uniformity is maintained if all values on divergent path match.
+      if (!CommonValue)
+        CommonValue = Values[I];
+      else if (Values[I] != *CommonValue)
+        Uniform = false;
+    }
+    if (Uniform)
       continue;
+
+    LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n");
     markDivergent(Phi);
   }
 }
@@ -1087,7 +1118,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
       DivCycles.push_back(Outermost);
       continue;
     }
-    taintAndPushPhiNodes(*JoinBlock);
+    taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc);
   }
 
   // Sort by order of decreasing depth. This allows later cycles to be skipped

diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -84,6 +84,26 @@ bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
   return true;
 }
 
+template <>
+void MachineSSAContext::getPhiInputs(
+    const MachineInstr &Phi, SmallVectorImpl<Register> &Values,
+    SmallVectorImpl<const MachineBasicBlock *> &Blocks) const {
+  if (!Phi.isPHI())
+    return;
+
+  const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo();
+  // const Register DstReg = Phi.getOperand(0).getReg();
+  for (unsigned Idx = 1, End = Phi.getNumOperands(); Idx < End; Idx += 2) {
+    Register Incoming = Phi.getOperand(Idx).getReg();
+    MachineInstr *Def = MRI.getVRegDef(Incoming);
+    // FIXME: should this also consider Incoming == DstReg undef?
+    if (Def && isUndef(*Def))
+      Incoming = ValueRefNull;
+    Values.push_back(Incoming);
+    Blocks.push_back(Phi.getOperand(Idx + 1).getMBB());
+  }
+}
+
 template <>
 Intrinsic::ID MachineSSAContext::getIntrinsicID(const MachineInstr &MI) {
   if (auto *GI = dyn_cast<GIntrinsic>(&MI))

diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/IR/SSAContext.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
@@ -68,6 +69,23 @@ bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
   return false;
 }
 
+template <>
+void SSAContext::getPhiInputs(
+    const Instruction &Instr, SmallVectorImpl<const Value *> &Values,
+    SmallVectorImpl<const BasicBlock *> &Blocks) const {
+  if (auto *Phi = dyn_cast<PHINode>(&Instr)) {
+    for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+      const Value *Incoming = Phi->getIncomingValue(I);
+      const BasicBlock *Block = Phi->getIncomingBlock(I);
+      // FIXME: should this also consider Incoming == &Instr undef?
+      if (isa<UndefValue>(Incoming))
+        Incoming = ValueRefNull;
+      Values.push_back(Incoming);
+      Blocks.push_back(Block);
+    }
+  }
+}
+
 template <> Intrinsic::ID SSAContext::getIntrinsicID(const Instruction &I) {
   if (auto *CB = dyn_cast<CallBase>(&I))
     return CB->getIntrinsicID();

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll
@@ -172,9 +172,9 @@ X:
 ; CHECK: DIVERGENT: %div.merge.x =
 
 Y:
-  %div.merge.y = phi i32 [ 42, %X ], [ %b, %B ]
+  %merge.y = phi i32 [ 42, %X ], [ %b, %B ]
   ret void
-; CHECK: DIVERGENT: %div.merge.y =
+; CHECK-NOT: DIVERGENT: %merge.y =
 }
 
 ; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi_no_divergence.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi_no_divergence.ll
@@ -0,0 +1,114 @@
+; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
+
+define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK-LABEL: for function 'no_divergent_exit1'
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %div.cond = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %div.cond =
+  br label %header
+
+header:
+  %loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ]
+; CHECK-NOT: DIVERGENT: %loop.b =
+  %loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
+; CHECK: DIVERGENT: %loop.c =
+  %exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ]
+; CHECK-NOT: DIVERGENT: %exit.val =
+  %exit.cond = icmp slt i32 %exit.val, 42
+; CHECK-NOT: DIVERGENT: %exit.cond =
+  br i1 %exit.cond, label %end, label %body.1
+; CHECK-NOT: DIVERGENT: br i1 %exit.cond,
+
+body.1:
+  %new.b = add i32 %loop.b, 1
+; CHECK-NOT: DIVERGENT: %new.b =
+  %next.exit.val = add i32 %exit.val, 1
+; CHECK-NOT: DIVERGENT: %next.exit.val =
+  br i1 %div.cond, label %body.2, label %header
+; CHECK: DIVERGENT: br i1 %div.cond,
+
+body.2:
+  %new.c = add i32 %loop.c, 1
+; CHECK: DIVERGENT: %new.c =
+  br label %header
+
+end:
+  ret void
+}
+
+define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK-LABEL: for function 'no_divergent_exit2'
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %div.cond = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %div.cond =
+  br label %header
+
+header:
+  %loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ]
+; CHECK-NOT: DIVERGENT: %loop.b =
+  %loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ]
+; CHECK: DIVERGENT: %loop.c =
+  %exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ]
+; CHECK-NOT: DIVERGENT: %exit.val =
+  %exit.cond = icmp slt i32 %exit.val, 42
+; CHECK-NOT: DIVERGENT: %exit.cond =
+  br i1 %exit.cond, label %end, label %body.1
+; CHECK-NOT: DIVERGENT: br i1 %exit.cond,
+
+body.1:
+  %new.b = add i32 %loop.b, 1
+; CHECK-NOT: DIVERGENT: %new.b =
+  %next.exit.val = add i32 %exit.val, 1
+; CHECK-NOT: DIVERGENT: %next.exit.val =
+  br i1 %div.cond, label %body.2, label %merge
+; CHECK: DIVERGENT: br i1 %div.cond,
+
+body.2:
+  %new.c = add i32 %loop.c, 1
+; CHECK: DIVERGENT: %new.c =
+  br label %merge
+
+merge:
+  %merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ]
+; CHECK-NOT: DIVERGENT: %merge.b =
+  %merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
+; CHECK: DIVERGENT: %merge.c =
+  br label %header
+
+end:
+  ret void
+}
+
+define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 {
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %uni.cond = icmp slt i32 %a, 0
+; CHECK-NOT: DIVERGENT: %uni.cond =
+  %div.cond = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %div.cond =
+  br i1 %uni.cond, label %div.branch.block, label %merge
+; CHECK-NOT: DIVERGENT: br i1 %uni.cond,
+
+div.branch.block:
+  br i1 %div.cond, label %div.block.1, label %div.block.2
+; CHECK: DIVERGENT: br i1 %div.cond,
+
+div.block.1:
+  br label %merge
+
+div.block.2:
+  br label %merge
+
+merge:
+  %uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ]
+; CHECK-NOT: DIVERGENT: %uni.val =
+  %div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ]
+; CHECK: DIVERGENT: %div.val =
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }