-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Uniformity] Avoid marking all PHIs as divergent in join blocks #157808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
7c44e3b
2804480
ca07077
f5fdd89
d2f469a
ce186d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl { | |
/// the worklist. | ||
void taintAndPushAllDefs(const BlockT &JoinBlock); | ||
|
||
/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on | ||
/// the worklist. | ||
void taintAndPushPhiNodes(const BlockT &JoinBlock); | ||
/// \brief Mark phi nodes in \p JoinBlock as divergent and push them on | ||
/// the worklist if they are divergent over the path from \p JoinBlock | ||
/// to \p DivTermBlock. | ||
void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock, | ||
const DivergenceDescriptorT &DivDesc); | ||
|
||
/// \brief Identify all Instructions that become divergent because \p DivExit | ||
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as | ||
|
@@ -917,19 +919,48 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs( | |
/// Mark divergent phi nodes in a join block | ||
template <typename ContextT> | ||
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes( | ||
const BlockT &JoinBlock) { | ||
const BlockT &JoinBlock, const BlockT &DivTermBlock, | ||
const DivergenceDescriptorT &DivDesc) { | ||
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock) | ||
<< "\n"); | ||
for (const auto &Phi : JoinBlock.phis()) { | ||
// FIXME: The non-undef value is not constant per se; it just happens to be | ||
// uniform and may not dominate this PHI. So assuming that the same value | ||
// reaches along all incoming edges may itself be undefined behaviour. This | ||
// particular interpretation of the undef value was added to | ||
// DivergenceAnalysis in the following review: | ||
// | ||
// https://reviews.llvm.org/D19013 | ||
if (ContextT::isConstantOrUndefValuePhi(Phi)) | ||
// Attempt to maintain uniformity for PHIs by considering control | ||
// dependencies before marking them. | ||
SmallVector<ConstValueRefT> Values; | ||
SmallVector<const BlockT *> Blocks; | ||
Context.getPhiInputs(Phi, Values, Blocks); | ||
assert(Blocks.size() == Values.size()); | ||
|
||
// Allow an empty Blocks/Values list to signify getPhiInputs is not | ||
// implemented; in which case no uniformity is possible. | ||
bool Uniform = !Values.empty(); | ||
|
||
|
||
std::optional<ConstValueRefT> CommonValue; | ||
for (unsigned I = 0; I < Blocks.size() && Uniform; ++I) { | ||
// FIXME: We assume undefs are uniform and/or do not dominate the PHI | ||
// in the presence of other constant or uniform values. | ||
// This particular interpretation of the undef value was added to | ||
// DivergenceAnalysis in the following review: | ||
// | ||
// https://reviews.llvm.org/D19013 | ||
if (!Values[I]) | ||
perlfu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue; | ||
|
||
// Only consider predecessors on divergent path. | ||
if (Blocks[I] != &DivTermBlock && | ||
!DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr)) | ||
continue; | ||
|
||
// Phi uniformity is maintained if all values on divergent path match. | ||
if (!CommonValue) | ||
CommonValue = Values[I]; | ||
else if (Values[I] != *CommonValue) | ||
Uniform = false; | ||
} | ||
if (Uniform) | ||
continue; | ||
|
||
LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n"); | ||
markDivergent(Phi); | ||
} | ||
} | ||
|
@@ -1087,7 +1118,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence( | |
DivCycles.push_back(Outermost); | ||
continue; | ||
} | ||
taintAndPushPhiNodes(*JoinBlock); | ||
taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc); | ||
} | ||
|
||
// Sort by order of decreasing depth. This allows later cycles to be skipped | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -172,9 +172,9 @@ X: | |
; CHECK: DIVERGENT: %div.merge.x = | ||
|
||
Y: | ||
%div.merge.y = phi i32 [ 42, %X ], [ %b, %B ] | ||
%merge.y = phi i32 [ 42, %X ], [ %b, %B ] | ||
ret void | ||
; CHECK: DIVERGENT: %div.merge.y = | ||
; CHECK-NOT: DIVERGENT: %merge.y = | ||
|
||
} | ||
|
||
; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be useful to have one or two lines explaining what's the distinguishing pattern in each function compared to other functions in the file. |
||
define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 { | ||
; CHECK-LABEL: for function 'no_divergent_exit1' | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br label %header | ||
|
||
header: | ||
%loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %loop.b = | ||
%loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ] | ||
; CHECK: DIVERGENT: %loop.c = | ||
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %exit.val = | ||
%exit.cond = icmp slt i32 %exit.val, 42 | ||
; CHECK-NOT: DIVERGENT: %exit.cond = | ||
br i1 %exit.cond, label %end, label %body.1 | ||
; CHECK-NOT: DIVERGENT: br i1 %exit.cond, | ||
|
||
body.1: | ||
%new.b = add i32 %loop.b, 1 | ||
; CHECK-NOT: DIVERGENT: %new.b = | ||
%next.exit.val = add i32 %exit.val, 1 | ||
; CHECK-NOT: DIVERGENT: %next.exit.val = | ||
br i1 %div.cond, label %body.2, label %header | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
body.2: | ||
%new.c = add i32 %loop.c, 1 | ||
; CHECK: DIVERGENT: %new.c = | ||
br label %header | ||
|
||
end: | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 { | ||
; CHECK-LABEL: for function 'no_divergent_exit2' | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br label %header | ||
|
||
header: | ||
%loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ] | ||
; CHECK-NOT: DIVERGENT: %loop.b = | ||
%loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ] | ||
; CHECK: DIVERGENT: %loop.c = | ||
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ] | ||
; CHECK-NOT: DIVERGENT: %exit.val = | ||
%exit.cond = icmp slt i32 %exit.val, 42 | ||
; CHECK-NOT: DIVERGENT: %exit.cond = | ||
br i1 %exit.cond, label %end, label %body.1 | ||
; CHECK-NOT: DIVERGENT: br i1 %exit.cond, | ||
|
||
body.1: | ||
%new.b = add i32 %loop.b, 1 | ||
; CHECK-NOT: DIVERGENT: %new.b = | ||
%next.exit.val = add i32 %exit.val, 1 | ||
; CHECK-NOT: DIVERGENT: %next.exit.val = | ||
br i1 %div.cond, label %body.2, label %merge | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
body.2: | ||
%new.c = add i32 %loop.c, 1 | ||
; CHECK: DIVERGENT: %new.c = | ||
br label %merge | ||
|
||
merge: | ||
%merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ] | ||
; CHECK-NOT: DIVERGENT: %merge.b = | ||
%merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ] | ||
; CHECK: DIVERGENT: %merge.c = | ||
br label %header | ||
|
||
end: | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 { | ||
entry: | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%uni.cond = icmp slt i32 %a, 0 | ||
; CHECK-NOT: DIVERGENT: %uni.cond = | ||
%div.cond = icmp slt i32 %tid, 0 | ||
; CHECK: DIVERGENT: %div.cond = | ||
br i1 %uni.cond, label %div.branch.block, label %merge | ||
; CHECK-NOT: DIVERGENT: br i1 %uni.cond, | ||
|
||
div.branch.block: | ||
br i1 %div.cond, label %div.block.1, label %div.block.2 | ||
; CHECK: DIVERGENT: br i1 %div.cond, | ||
|
||
div.block.1: | ||
br label %merge | ||
|
||
div.block.2: | ||
br label %merge | ||
|
||
merge: | ||
%uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ] | ||
; CHECK-NOT: DIVERGENT: %uni.val = | ||
%div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ] | ||
; CHECK: DIVERGENT: %div.val = | ||
ret void | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need a test with temporal divergence? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the existing tests cover this. |
||
declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|
||
attributes #0 = { nounwind readnone } |
Uh oh!
There was an error while loading. Please reload this page.