-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Uniformity] Avoid marking all PHIs as divergent in join blocks #157808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
7c44e3b
2804480
ca07077
f5fdd89
d2f469a
ce186d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl { | |
/// the worklist. | ||
void taintAndPushAllDefs(const BlockT &JoinBlock); | ||
|
||
/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on | ||
/// the worklist. | ||
void taintAndPushPhiNodes(const BlockT &JoinBlock); | ||
/// \brief Mark phi nodes in \p JoinBlock as divergent and push them on | ||
/// the worklist if they are divergent over the by the path \p JoinBlock | ||
perlfu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
/// to \p DivTermBlock. | ||
void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock, | ||
const DivergenceDescriptorT &DivDesc); | ||
|
||
/// \brief Identify all Instructions that become divergent because \p DivExit | ||
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as | ||
|
@@ -917,7 +919,8 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs( | |
/// Mark divergent phi nodes in a join block | ||
template <typename ContextT> | ||
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes( | ||
const BlockT &JoinBlock) { | ||
const BlockT &JoinBlock, const BlockT &DivTermBlock, | ||
const DivergenceDescriptorT &DivDesc) { | ||
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock) | ||
<< "\n"); | ||
for (const auto &Phi : JoinBlock.phis()) { | ||
|
@@ -930,6 +933,44 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes( | |
// https://reviews.llvm.org/D19013 | ||
if (ContextT::isConstantOrUndefValuePhi(Phi)) | ||
continue; | ||
|
||
// Attempt to maintain uniformity for PHIs by considering control | ||
// dependencies. | ||
SmallVector<ConstValueRefT> Values; | ||
SmallVector<const BlockT *> Blocks; | ||
Context.getPhiInputs(Phi, Values, Blocks); | ||
assert(Blocks.size() == Values.size()); | ||
|
||
// Allow an empty Blocks/Values list to signify getPhiInputs is not | ||
// implemented; in which case no uniformity is possible. | ||
bool Uniform = !Values.empty(); | ||
|
||
|
||
std::optional<ConstValueRefT> CommonValue; | ||
for (unsigned I = 0; I < Blocks.size() && Uniform; ++I) { | ||
if (DivDesc.CycleDivBlocks.contains(Blocks[I])) { | ||
// If PHI is reachable via divergent exit it is divergent. | ||
Uniform = false; | ||
} else if (DT.dominates(&DivTermBlock, Blocks[I]) || | ||
perlfu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr)) { | ||
// If all edges from the marked path share a common value then | ||
// uniformity is preserved when the value is itself uniform. | ||
if (!CommonValue) | ||
CommonValue = Values[I]; | ||
else | ||
Uniform = Values[I] == *CommonValue; | ||
perlfu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
// Ignore undefined values when checking definitions. | ||
if (!Values[I]) | ||
perlfu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue; | ||
// Any value defined on the divergent path is divergent. | ||
const BlockT *DefBlock = Context.getDefBlock(Values[I]); | ||
if (DivDesc.BlockLabels.lookup_or(DefBlock, nullptr)) | ||
Uniform = false; | ||
perlfu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
if (Uniform) | ||
continue; | ||
|
||
LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n"); | ||
markDivergent(Phi); | ||
} | ||
} | ||
|
@@ -1087,7 +1128,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence( | |
DivCycles.push_back(Outermost); | ||
continue; | ||
} | ||
taintAndPushPhiNodes(*JoinBlock); | ||
taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc); | ||
} | ||
|
||
// Sort by order of decreasing depth. This allows later cycles to be skipped | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be useful to have one or two lines explaining what's the distinguishing pattern in each function compared to other functions in the file. |
||
define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 { | ||
; CHECK-LABEL: for function 'no_divergent_exit1' | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br label %header | ||
|
||
header: | ||
%loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %loop.b = | ||
%loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ] | ||
; CHECK: DIVERGENT: %loop.c = | ||
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %exit.val = | ||
%exit.cond = icmp slt i32 %exit.val, 42 | ||
; CHECK-NOT: DIVERGENT: %exit.cond = | ||
br i1 %exit.cond, label %end, label %body.1 | ||
; CHECK-NOT: DIVERGENT: br i1 %exit.cond, | ||
|
||
body.1: | ||
%new.b = add i32 %loop.b, 1 | ||
; CHECK-NOT: DIVERGENT: %new.b = | ||
%next.exit.val = add i32 %exit.val, 1 | ||
; CHECK-NOT: DIVERGENT: %next.exit.val = | ||
br i1 %div.cond, label %body.2, label %header | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
body.2: | ||
%new.c = add i32 %loop.c, 1 | ||
; CHECK: DIVERGENT: %new.c = | ||
br label %header | ||
|
||
end: | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 { | ||
; CHECK-LABEL: for function 'no_divergent_exit2' | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br label %header | ||
|
||
header: | ||
%loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ] | ||
; CHECK-NOT: DIVERGENT: %loop.b = | ||
%loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ] | ||
; CHECK: DIVERGENT: %loop.c = | ||
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ] | ||
; CHECK-NOT: DIVERGENT: %exit.val = | ||
%exit.cond = icmp slt i32 %exit.val, 42 | ||
; CHECK-NOT: DIVERGENT: %exit.cond = | ||
br i1 %exit.cond, label %end, label %body.1 | ||
; CHECK-NOT: DIVERGENT: br i1 %exit.cond, | ||
|
||
body.1: | ||
%new.b = add i32 %loop.b, 1 | ||
; CHECK-NOT: DIVERGENT: %new.b = | ||
%next.exit.val = add i32 %exit.val, 1 | ||
; CHECK-NOT: DIVERGENT: %next.exit.val = | ||
br i1 %div.cond, label %body.2, label %merge | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
body.2: | ||
%new.c = add i32 %loop.c, 1 | ||
; CHECK: DIVERGENT: %new.c = | ||
br label %merge | ||
|
||
merge: | ||
%merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %merge.b = | ||
%merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ] | ||
; CHECK: DIVERGENT: %merge.c = | ||
br label %header | ||
|
||
end: | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 { | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%uni.cond = icmp slt i32 %a, 0 | ||
; CHECK-NOT: DIVERGENT: %uni.cond = | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br i1 %uni.cond, label %div.branch.block, label %merge | ||
; CHECK-NOT: DIVERGENT: br i1 %uni.cond, | ||
|
||
div.branch.block: | ||
br i1 %div.cond, label %div.block.1, label %div.block.2 | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
div.block.1: | ||
br label %merge | ||
|
||
div.block.2: | ||
br label %merge | ||
|
||
merge: | ||
%uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ] | ||
; CHECK-NOT: DIVERGENT: %uni.val = | ||
%div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ] | ||
; CHECK: DIVERGENT: %div.val = | ||
ret void | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need a test with temporal divergence? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the existing tests cover this. |
||
declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|
||
attributes #0 = { nounwind readnone } |
Uh oh!
There was an error while loading. Please reload this page.