Skip to content

Commit fbc4a5f

Browse files
address reviewer's comments
1 parent 4715789 commit fbc4a5f

File tree

2 files changed

+99
-24
lines changed

2 files changed

+99
-24
lines changed

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,8 +1636,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
16361636
// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
16371637
// ...
16381638
// %identicalPhi.next = select %cmp, %val, %identicalPhi
1639-
// %1 = select %cmp2, %identicalPhi, float %phi
1639+
// (or select %cmp, %identicalPhi, %val)
1640+
// %1 = select %cmp2, %identicalPhi, %phi
16401641
// %phi.next = select %cmp, %val, %1
1642+
// (or select %cmp, %1, %val)
16411643
//
16421644
// Prove that %phi and %identicalPhi are the same by induction:
16431645
//
@@ -1646,43 +1648,58 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
16461648
// Suppose %phi and %identicalPhi are equal at iteration i.
16471649
// We look at their values at iteration i+1 which are %phi.next and
16481650
// %identicalPhi.next. They would have become different only when %cmp is
1649-
// false and the corresponding values %1 and %identicalPhi differ.
1651+
// false and the corresponding values %1 and %identicalPhi differ
1652+
// (similar reason for the other "or" case in the bracket).
16501653
//
16511654
// The only condition when %1 and %identicalPh could differ is when %cmp2
16521655
// is false and %1 is %phi, which contradicts our inductive hypothesis
16531656
// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
16541657
// always equal at iteration i+1.
16551658

1656-
if (PN.getNumIncomingValues() == 2 && PN.getNumUses() == 1) {
1657-
unsigned diffVals = 0;
1658-
unsigned diffValIdx = 0;
1659+
if (PN.getNumIncomingValues() == 2) {
1660+
unsigned DiffVals = 0;
1661+
BasicBlock *DiffValBB = nullptr;
16591662
// Check that only the backedge incoming value is different.
16601663
for (unsigned i = 0; i < 2; i++) {
1661-
if (PN.getIncomingValue(i) != IdenticalPN.getIncomingValue(i)) {
1662-
diffVals++;
1663-
diffValIdx = i;
1664+
BasicBlock *PredBB = PN.getIncomingBlock(i);
1665+
if (PN.getIncomingValueForBlock(PredBB) !=
1666+
IdenticalPN.getIncomingValueForBlock(PredBB)) {
1667+
DiffVals++;
1668+
DiffValBB = PredBB;
16641669
}
16651670
}
16661671
BasicBlock *CurBB = PN.getParent();
1667-
if (diffVals == 2 || PN.getIncomingBlock(diffValIdx) != CurBB)
1672+
if (DiffVals == 2 || DiffValBB != CurBB)
16681673
continue;
16691674
// Now check that the backedge incoming values are two select
1670-
// instructions that are in the same BB, and have the same condition,
1671-
// true value.
1672-
auto *Val = PN.getIncomingValue(diffValIdx);
1673-
auto *IdenticalVal = IdenticalPN.getIncomingValue(diffValIdx);
1675+
// instructions that are in the same BB, and have the same condition.
1676+
// Either their true values are the same, or their false values are
1677+
// the same.
1678+
auto *Val = PN.getIncomingValueForBlock(DiffValBB);
1679+
auto *IdenticalVal = IdenticalPN.getIncomingValueForBlock(DiffValBB);
16741680
if (!isa<SelectInst>(Val) || !isa<SelectInst>(IdenticalVal))
16751681
continue;
16761682

16771683
auto *SI = cast<SelectInst>(Val);
16781684
auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
1679-
if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB)
1685+
if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB ||
1686+
SI->getNumUses() != 1)
16801687
continue;
16811688
if (SI->getCondition() != IdenticalSI->getCondition() ||
1682-
SI->getTrueValue() != IdenticalSI->getTrueValue())
1689+
(SI->getTrueValue() != IdenticalSI->getTrueValue() &&
1690+
SI->getFalseValue() != IdenticalSI->getFalseValue()))
16831691
continue;
1692+
Value *SIOtherVal = nullptr;
1693+
Value *IdenticalSIOtherVal = nullptr;
1694+
if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
1695+
SIOtherVal = SI->getFalseValue();
1696+
IdenticalSIOtherVal = IdenticalSI->getFalseValue();
1697+
} else {
1698+
SIOtherVal = SI->getTrueValue();
1699+
IdenticalSIOtherVal = IdenticalSI->getTrueValue();
1700+
}
16841701

1685-
// Now check that the false values, i.e., %1 and %identicalPhi,
1702+
// Now check that the other values in select, i.e., %1 and %identicalPhi,
16861703
// are essentially the same value within the same BB.
16871704
auto SameSelAndPhi = [&](SelectInst *SI, PHINode *IdenticalPN,
16881705
PHINode *PN) {
@@ -1691,15 +1708,13 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
16911708
}
16921709
return false;
16931710
};
1694-
auto *FalseVal = SI->getFalseValue();
1695-
auto *IdenticalSIFalseVal =
1696-
dyn_cast<PHINode>(IdenticalSI->getFalseValue());
1697-
if (!isa<SelectInst>(FalseVal) || !IdenticalSIFalseVal ||
1698-
IdenticalSIFalseVal != &IdenticalPN)
1711+
if (!isa<SelectInst>(SIOtherVal) || !isa<PHINode>(IdenticalSIOtherVal))
1712+
continue;
1713+
if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
16991714
continue;
1700-
auto *FalseValSI = cast<SelectInst>(FalseVal);
1701-
if (FalseValSI->getParent() != CurBB ||
1702-
!SameSelAndPhi(FalseValSI, &IdenticalPN, &PN))
1715+
auto *SIOtherValAsSel = cast<SelectInst>(SIOtherVal);
1716+
if (SIOtherValAsSel->getParent() != CurBB ||
1717+
!SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
17031718
continue;
17041719

17051720
++NumPHICSEs;

llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,63 @@ exit:
5959
store float %vl.1.lcssa, ptr @A
6060
ret void
6161
}
62+
63+
; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
64+
; The difference from enhanced_phi_cse() is that the true and false values in
65+
; %phi.to.remove.next and %v1.1 are swapped.
66+
define void @enhanced_phi_cse_2(ptr %m, ptr %n, i32 %count) {
67+
; CHECK-LABEL: @enhanced_phi_cse_2(
68+
; CHECK-NEXT: entry:
69+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
70+
; CHECK: for.body:
71+
; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
72+
; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
73+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
74+
; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
75+
; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
76+
; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
77+
; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
78+
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
79+
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
80+
; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
81+
; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
82+
; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
83+
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
84+
; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
85+
; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
86+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
87+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
88+
; CHECK: exit:
89+
; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
90+
; CHECK-NEXT: ret void
91+
;
92+
entry:
93+
br label %for.body
94+
95+
for.body: ; preds = %entry, %for.body
96+
%v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
97+
%v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
98+
%phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
99+
%i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
100+
%q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
101+
%c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
102+
%q.load = load float, ptr %q
103+
%c.load = load float, ptr %c
104+
%sub = fsub float %q.load, %c.load
105+
%cmp1 = fcmp olt float %sub, %v0
106+
%v0.1 = select i1 %cmp1, float %sub, float %v0
107+
%same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
108+
%cmp2 = fcmp ogt float %sub, %same.as.v1
109+
%v1.1 = select i1 %cmp2, float %v1, float %sub
110+
%phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
111+
%inc.i = add nuw nsw i32 %i, 1
112+
%q.next = getelementptr inbounds i8, ptr %q, i64 4
113+
%c.next = getelementptr inbounds i8, ptr %c, i64 4
114+
%exitcond = icmp eq i32 %inc.i, %count
115+
br i1 %exitcond, label %exit, label %for.body
116+
117+
exit:
118+
%vl.1.lcssa = phi float [ %v1.1, %for.body ]
119+
store float %vl.1.lcssa, ptr @A
120+
ret void
121+
}

0 commit comments

Comments
 (0)