Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/include/llvm/ADT/GenericSSAContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ template <typename _FunctionT> class GenericSSAContext {

// The null value for ValueRefT. For LLVM IR and MIR, this is simply the
// default constructed value.
static constexpr ValueRefT *ValueRefNull = {};
static constexpr ValueRefT ValueRefNull = {};

// An InstructionT usually defines one or more ValueT objects.
using InstructionT = typename SSATraits::InstructionT;
Expand Down Expand Up @@ -96,6 +96,10 @@ template <typename _FunctionT> class GenericSSAContext {
static bool isConstantOrUndefValuePhi(const InstructionT &Instr);
const BlockT *getDefBlock(ConstValueRefT value) const;

void getPhiInputs(const InstructionT &Instr,
SmallVectorImpl<ConstValueRefT> &Values,
SmallVectorImpl<const BlockT *> &Blocks) const;

Printable print(const BlockT *block) const;
Printable printAsOperand(const BlockT *BB) const;
Printable print(const InstructionT *inst) const;
Expand Down
57 changes: 44 additions & 13 deletions llvm/include/llvm/ADT/GenericUniformityImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
/// the worklist.
void taintAndPushAllDefs(const BlockT &JoinBlock);

/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
/// the worklist.
void taintAndPushPhiNodes(const BlockT &JoinBlock);
/// \brief Mark phi nodes in \p JoinBlock as divergent and push them on
/// the worklist if they are divergent over the path from \p JoinBlock
/// to \p DivTermBlock.
void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc);

/// \brief Identify all Instructions that become divergent because \p DivExit
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as
Expand Down Expand Up @@ -917,19 +919,48 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
/// Mark divergent phi nodes in a join block
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
const BlockT &JoinBlock) {
const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
<< "\n");
for (const auto &Phi : JoinBlock.phis()) {
// FIXME: The non-undef value is not constant per se; it just happens to be
// uniform and may not dominate this PHI. So assuming that the same value
// reaches along all incoming edges may itself be undefined behaviour. This
// particular interpretation of the undef value was added to
// DivergenceAnalysis in the following review:
//
// https://reviews.llvm.org/D19013
if (ContextT::isConstantOrUndefValuePhi(Phi))
// Attempt to maintain uniformity for PHIs by considering control
// dependencies before marking them.
SmallVector<ConstValueRefT> Values;
SmallVector<const BlockT *> Blocks;
Context.getPhiInputs(Phi, Values, Blocks);
assert(Blocks.size() == Values.size());

// Allow an empty Blocks/Values list to signify getPhiInputs is not
// implemented; in which case no uniformity is possible.
bool Uniform = !Values.empty();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just exit early at this point instead of going through the for-loop below over all incoming blocks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loop is already predicated on Uniform so it won't go through blocks in this case.


std::optional<ConstValueRefT> CommonValue;
for (unsigned I = 0; I < Blocks.size() && Uniform; ++I) {
// FIXME: We assume undefs are uniform and/or do not dominate the PHI
// in the presence of other constant or uniform values.
// This particular interpretation of the undef value was added to
// DivergenceAnalysis in the following review:
//
// https://reviews.llvm.org/D19013
if (!Values[I])
continue;

// Only consider predecessors on divergent path.
if (Blocks[I] != &DivTermBlock &&
!DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr))
continue;

// Phi uniformity is maintained if all values on divergent path match.
if (!CommonValue)
CommonValue = Values[I];
else if (Values[I] != *CommonValue)
Uniform = false;
}
if (Uniform)
continue;

LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n");
markDivergent(Phi);
}
}
Expand Down Expand Up @@ -1087,7 +1118,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
DivCycles.push_back(Outermost);
continue;
}
taintAndPushPhiNodes(*JoinBlock);
taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc);
}

// Sort by order of decreasing depth. This allows later cycles to be skipped
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/MachineSSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return true;
}

template <>
void MachineSSAContext::getPhiInputs(
const MachineInstr &Phi, SmallVectorImpl<Register> &Values,
SmallVectorImpl<const MachineBasicBlock *> &Blocks) const {
if (!Phi.isPHI())
return;

const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo();
// const Register DstReg = Phi.getOperand(0).getReg();
for (unsigned Idx = 1, End = Phi.getNumOperands(); Idx < End; Idx += 2) {
Register Incoming = Phi.getOperand(Idx).getReg();
MachineInstr *Def = MRI.getVRegDef(Incoming);
// FIXME: should this also consider Incoming == DstReg undef?
if (Def && isUndef(*Def))
Incoming = ValueRefNull;
Values.push_back(Incoming);
Blocks.push_back(Phi.getOperand(Idx + 1).getMBB());
}
}

template <>
Intrinsic::ID MachineSSAContext::getIntrinsicID(const MachineInstr &MI) {
if (auto *GI = dyn_cast<GIntrinsic>(&MI))
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/IR/SSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/IR/SSAContext.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
Expand Down Expand Up @@ -68,6 +69,23 @@ bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
return false;
}

template <>
void SSAContext::getPhiInputs(
const Instruction &Instr, SmallVectorImpl<const Value *> &Values,
SmallVectorImpl<const BasicBlock *> &Blocks) const {
if (auto *Phi = dyn_cast<PHINode>(&Instr)) {
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
const Value *Incoming = Phi->getIncomingValue(I);
const BasicBlock *Block = Phi->getIncomingBlock(I);
// FIXME: should this also consider Incoming == &Instr undef?
if (isa<UndefValue>(Incoming))
Incoming = ValueRefNull;
Values.push_back(Incoming);
Blocks.push_back(Block);
}
}
}

template <> Intrinsic::ID SSAContext::getIntrinsicID(const Instruction &I) {
if (auto *CB = dyn_cast<CallBase>(&I))
return CB->getIntrinsicID();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ X:
; CHECK: DIVERGENT: %div.merge.x =

Y:
%div.merge.y = phi i32 [ 42, %X ], [ %b, %B ]
%merge.y = phi i32 [ 42, %X ], [ %b, %B ]
ret void
; CHECK: DIVERGENT: %div.merge.y =
; CHECK-NOT: DIVERGENT: %merge.y =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change is wrong. Once we go entry->H->G, there is a divergent branch which can choose either G->X->Y or G->L->C->H->B->Y (!). Therefore the choice of whether we arrive at Y from X or from B is divergent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've fixed this by reinstating my previous code which respected temporal divergence.

}

; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)
Expand Down
114 changes: 114 additions & 0 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi_no_divergence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be useful to have one or two lines explaining what's the distinguishing pattern in each function compared to other functions in the file.

define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit1'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %header
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %header

end:
ret void
}

define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit2'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %merge
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %merge

merge:
%merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %merge.b =
%merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %merge.c =
br label %header

end:
ret void
}

define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%uni.cond = icmp slt i32 %a, 0
; CHECK-NOT: DIVERGENT: %uni.cond =
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br i1 %uni.cond, label %div.branch.block, label %merge
; CHECK-NOT: DIVERGENT: br i1 %uni.cond,

div.branch.block:
br i1 %div.cond, label %div.block.1, label %div.block.2
; CHECK: DIVERGENT: br i1 %div.cond,

div.block.1:
br label %merge

div.block.2:
br label %merge

merge:
%uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ]
; CHECK-NOT: DIVERGENT: %uni.val =
%div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ]
; CHECK: DIVERGENT: %div.val =
ret void
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a test with temporal divergence?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the existing tests cover this.

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }
Loading