Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/include/llvm/ADT/GenericSSAContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ template <typename _FunctionT> class GenericSSAContext {

// The null value for ValueRefT. For LLVM IR and MIR, this is simply the
// default constructed value.
static constexpr ValueRefT *ValueRefNull = {};
static constexpr ValueRefT ValueRefNull = {};

// An InstructionT usually defines one or more ValueT objects.
using InstructionT = typename SSATraits::InstructionT;
Expand Down Expand Up @@ -96,6 +96,10 @@ template <typename _FunctionT> class GenericSSAContext {
static bool isConstantOrUndefValuePhi(const InstructionT &Instr);
const BlockT *getDefBlock(ConstValueRefT value) const;

void getPhiInputs(const InstructionT &Instr,
SmallVectorImpl<ConstValueRefT> &Values,
SmallVectorImpl<const BlockT *> &Blocks) const;

Printable print(const BlockT *block) const;
Printable printAsOperand(const BlockT *BB) const;
Printable print(const InstructionT *inst) const;
Expand Down
51 changes: 46 additions & 5 deletions llvm/include/llvm/ADT/GenericUniformityImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
/// the worklist.
void taintAndPushAllDefs(const BlockT &JoinBlock);

/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
/// the worklist.
void taintAndPushPhiNodes(const BlockT &JoinBlock);
/// \brief Mark phi nodes in \p JoinBlock as divergent and push them on
/// the worklist if they are divergent over the by the path \p JoinBlock
/// to \p DivTermBlock.
void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc);

/// \brief Identify all Instructions that become divergent because \p DivExit
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as
Expand Down Expand Up @@ -917,7 +919,8 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
/// Mark divergent phi nodes in a join block
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
const BlockT &JoinBlock) {
const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
<< "\n");
for (const auto &Phi : JoinBlock.phis()) {
Expand All @@ -930,6 +933,44 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
// https://reviews.llvm.org/D19013
if (ContextT::isConstantOrUndefValuePhi(Phi))
continue;

// Attempt to maintain uniformity for PHIs by considering control
// dependencies.
SmallVector<ConstValueRefT> Values;
SmallVector<const BlockT *> Blocks;
Context.getPhiInputs(Phi, Values, Blocks);
assert(Blocks.size() == Values.size());

// Allow an empty Blocks/Values list to signify getPhiInputs is not
// implemented; in which case no uniformity is possible.
bool Uniform = !Values.empty();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just exit early at this point instead of going through the for-loop below over all incoming blocks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loop is already predicated on Uniform so it won't go through blocks in this case.


std::optional<ConstValueRefT> CommonValue;
for (unsigned I = 0; I < Blocks.size() && Uniform; ++I) {
if (DivDesc.CycleDivBlocks.contains(Blocks[I])) {
// If PHI is reachable via divergent exit it is divergent.
Uniform = false;
} else if (DT.dominates(&DivTermBlock, Blocks[I]) ||
DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr)) {
// If all edges from the marked path share a common value then
// uniformity is preserved when the value is itself uniform.
if (!CommonValue)
CommonValue = Values[I];
else
Uniform = Values[I] == *CommonValue;
}
// Ignore undefined values when checking definitions.
if (!Values[I])
continue;
// Any value defined on the divergent path is divergent.
const BlockT *DefBlock = Context.getDefBlock(Values[I]);
if (DivDesc.BlockLabels.lookup_or(DefBlock, nullptr))
Uniform = false;
}
if (Uniform)
continue;

LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n");
markDivergent(Phi);
}
}
Expand Down Expand Up @@ -1087,7 +1128,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
DivCycles.push_back(Outermost);
continue;
}
taintAndPushPhiNodes(*JoinBlock);
taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc);
}

// Sort by order of decreasing depth. This allows later cycles to be skipped
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/MachineSSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,20 @@ bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return true;
}

template <>
void MachineSSAContext::getPhiInputs(
const MachineInstr &Phi, SmallVectorImpl<Register> &Values,
SmallVectorImpl<const MachineBasicBlock *> &Blocks) const {
if (!Phi.isPHI())
return;
for (unsigned Idx = 1, End = Phi.getNumOperands(); Idx < End; Idx += 2) {
// FIXME: ideally we would turn undef values into ValueRefNull.
// This could reduce number of PHIs marked in taintAndPushPhiNodes().
Values.push_back(Phi.getOperand(Idx).getReg());
Blocks.push_back(Phi.getOperand(Idx + 1).getMBB());
}
}

template <>
Intrinsic::ID MachineSSAContext::getIntrinsicID(const MachineInstr &MI) {
if (auto *GI = dyn_cast<GIntrinsic>(&MI))
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/IR/SSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/IR/SSAContext.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
Expand Down Expand Up @@ -68,6 +69,20 @@ bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
return false;
}

template <>
void SSAContext::getPhiInputs(
const Instruction &Instr, SmallVectorImpl<const Value *> &Values,
SmallVectorImpl<const BasicBlock *> &Blocks) const {
if (auto *Phi = dyn_cast<PHINode>(&Instr)) {
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
const Value *Incoming = Phi->getIncomingValue(I);
const BasicBlock *Block = Phi->getIncomingBlock(I);
Values.push_back(!isa<UndefValue>(Incoming) ? Incoming : ValueRefNull);
Blocks.push_back(Block);
}
}
}

template <> Intrinsic::ID SSAContext::getIntrinsicID(const Instruction &I) {
if (auto *CB = dyn_cast<CallBase>(&I))
return CB->getIntrinsicID();
Expand Down
114 changes: 114 additions & 0 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi_no_divergence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be useful to have one or two lines explaining what's the distinguishing pattern in each function compared to other functions in the file.

define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit1'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %header
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %header

end:
ret void
}

define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit2'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %merge
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %merge

merge:
%merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %merge.b =
%merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %merge.c =
br label %header

end:
ret void
}

define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%uni.cond = icmp slt i32 %a, 0
; CHECK-NOT: DIVERGENT: %uni.cond =
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br i1 %uni.cond, label %div.branch.block, label %merge
; CHECK-NOT: DIVERGENT: br i1 %uni.cond,

div.branch.block:
br i1 %div.cond, label %div.block.1, label %div.block.2
; CHECK: DIVERGENT: br i1 %div.cond,

div.block.1:
br label %merge

div.block.2:
br label %merge

merge:
%uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ]
; CHECK-NOT: DIVERGENT: %uni.val =
%div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ]
; CHECK: DIVERGENT: %div.val =
ret void
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a test with temporal divergence?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the existing tests cover this.

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }