-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NFC][LLVM][DSE] Extract large member functions out of class definition #162320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
85a029f
to
fda8a33
Compare
fda8a33
to
4cceeb4
Compare
@llvm/pr-subscribers-llvm-transforms Author: Rahul Joshi (jurahul) ChangesPatch is 113.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162320.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7ad710ddfb273..33a2489a7ac63 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -987,71 +987,14 @@ struct DSEState {
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
- const LoopInfo &LI)
- : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
- PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
- // Collect blocks with throwing instructions not modeled in MemorySSA and
- // alloc-like objects.
- unsigned PO = 0;
- for (BasicBlock *BB : post_order(&F)) {
- PostOrderNumbers[BB] = PO++;
- for (Instruction &I : *BB) {
- MemoryAccess *MA = MSSA.getMemoryAccess(&I);
- if (I.mayThrow() && !MA)
- ThrowingBlocks.insert(I.getParent());
-
- auto *MD = dyn_cast_or_null<MemoryDef>(MA);
- if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
- (EnableInitializesImprovement && hasInitializesAttr(&I))))
- MemDefs.push_back(MD);
- }
- }
-
- // Treat byval, inalloca or dead on return arguments the same as Allocas,
- // stores to them are dead at the end of the function.
- for (Argument &AI : F.args())
- if (AI.hasPassPointeeByValueCopyAttr() || AI.hasDeadOnReturnAttr())
- InvisibleToCallerAfterRet.insert({&AI, true});
-
- // Collect whether there is any irreducible control flow in the function.
- ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
- AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
- return isa<UnreachableInst>(E->getTerminator());
- });
- }
+ const LoopInfo &LI);
static void pushMemUses(MemoryAccess *Acc,
SmallVectorImpl<MemoryAccess *> &WorkList,
- SmallPtrSetImpl<MemoryAccess *> &Visited) {
- for (Use &U : Acc->uses()) {
- auto *MA = cast<MemoryAccess>(U.getUser());
- if (Visited.insert(MA).second)
- WorkList.push_back(MA);
- }
- };
+ SmallPtrSetImpl<MemoryAccess *> &Visited);
LocationSize strengthenLocationSize(const Instruction *I,
- LocationSize Size) const {
- if (auto *CB = dyn_cast<CallBase>(I)) {
- LibFunc F;
- if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
- (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
- // Use the precise location size specified by the 3rd argument
- // for determining KillingI overwrites DeadLoc if it is a memset_chk
- // instruction. memset_chk will write either the amount specified as 3rd
- // argument or the function will immediately abort and exit the program.
- // NOTE: AA may determine NoAlias if it can prove that the access size
- // is larger than the allocation size due to that being UB. To avoid
- // returning potentially invalid NoAlias results by AA, limit the use of
- // the precise location size to isOverwrite.
- if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
- return LocationSize::precise(Len->getZExtValue());
- }
- }
- return Size;
- }
+ LocationSize Size) const;
/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
@@ -1065,1269 +1008,1450 @@ struct DSEState {
const Instruction *DeadI,
const MemoryLocation &KillingLoc,
const MemoryLocation &DeadLoc,
- int64_t &KillingOff, int64_t &DeadOff) {
- // AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independent of any
- // loops they are in.
- if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
- return OW_Unknown;
+ int64_t &KillingOff, int64_t &DeadOff);
- LocationSize KillingLocSize =
- strengthenLocationSize(KillingI, KillingLoc.Size);
- const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
- const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
- const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
- const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
-
- // Check whether the killing store overwrites the whole object, in which
- // case the size/offset of the dead store does not matter.
- if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
- isIdentifiedObject(KillingUndObj)) {
- std::optional<TypeSize> KillingUndObjSize =
- getPointerSize(KillingUndObj, DL, TLI, &F);
- if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
- return OW_Complete;
- }
+ bool isInvisibleToCallerAfterRet(const Value *V);
+ bool isInvisibleToCallerOnUnwind(const Value *V);
- // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
- // get imprecise values here, though (except for unknown sizes).
- if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
- // In case no constant size is known, try to an IR values for the number
- // of bytes written and check if they match.
- const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
- const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
- if (KillingMemI && DeadMemI) {
- const Value *KillingV = KillingMemI->getLength();
- const Value *DeadV = DeadMemI->getLength();
- if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
- return OW_Complete;
- }
+ std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
- // Masked stores have imprecise locations, but we can reason about them
- // to some extent.
- return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
- }
+ // Returns a list of <MemoryLocation, bool> pairs written by I.
+ // The bool means whether the write is from Initializes attr.
+ SmallVector<std::pair<MemoryLocation, bool>, 1>
+ getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
- const TypeSize KillingSize = KillingLocSize.getValue();
- const TypeSize DeadSize = DeadLoc.Size.getValue();
- // Bail on doing Size comparison which depends on AA for now
- // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
- const bool AnyScalable =
- DeadSize.isScalable() || KillingLocSize.isScalable();
+ /// Assuming this instruction has a dead analyzable write, can we delete
+ /// this instruction?
+ bool isRemovable(Instruction *I);
- if (AnyScalable)
- return OW_Unknown;
- // Query the alias information
- AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
-
- // If the start pointers are the same, we just have to compare sizes to see if
- // the killing store was larger than the dead store.
- if (AAR == AliasResult::MustAlias) {
- // Make sure that the KillingSize size is >= the DeadSize size.
- if (KillingSize >= DeadSize)
- return OW_Complete;
- }
+ /// Returns true if \p UseInst completely overwrites \p DefLoc
+ /// (stored by \p DefInst).
+ bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
+ Instruction *UseInst);
- // If we hit a partial alias we may have a full overwrite
- if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
- int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
- return OW_Complete;
- }
+ /// Returns true if \p Def is not read before returning from the function.
+ bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
- // If we can't resolve the same pointers to the same object, then we can't
- // analyze them at all.
- if (DeadUndObj != KillingUndObj) {
- // Non aliasing stores to different objects don't overlap. Note that
- // if the killing store is known to overwrite whole object (out of
- // bounds access overwrites whole object as well) then it is assumed to
- // completely overwrite any store to the same object even if they don't
- // actually alias (see next check).
- if (AAR == AliasResult::NoAlias)
- return OW_None;
- return OW_Unknown;
- }
+ /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
+ /// pair with the MemoryLocation terminated by \p I and a boolean flag
+ /// indicating whether \p I is a free-like call.
+ std::optional<std::pair<MemoryLocation, bool>>
+ getLocForTerminator(Instruction *I) const;
- // Okay, we have stores to two completely different pointers. Try to
- // decompose the pointer into a "base + constant_offset" form. If the base
- // pointers are equal, then we can reason about the two stores.
- DeadOff = 0;
- KillingOff = 0;
- const Value *DeadBasePtr =
- GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
- const Value *KillingBasePtr =
- GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
-
- // If the base pointers still differ, we have two completely different
- // stores.
- if (DeadBasePtr != KillingBasePtr)
- return OW_Unknown;
+ /// Returns true if \p I is a memory terminator instruction like
+ /// llvm.lifetime.end or free.
+ bool isMemTerminatorInst(Instruction *I) const {
+ auto *CB = dyn_cast<CallBase>(I);
+ return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
+ getFreedOperand(CB, &TLI) != nullptr);
+ }
- // The killing access completely overlaps the dead store if and only if
- // both start and end of the dead one is "inside" the killing one:
- // |<->|--dead--|<->|
- // |-----killing------|
- // Accesses may overlap if and only if start of one of them is "inside"
- // another one:
- // |<->|--dead--|<-------->|
- // |-------killing--------|
- // OR
- // |-------dead-------|
- // |<->|---killing---|<----->|
- //
- // We have to be careful here as *Off is signed while *.Size is unsigned.
-
- // Check if the dead access starts "not before" the killing one.
- if (DeadOff >= KillingOff) {
- // If the dead access ends "not after" the killing access then the
- // dead one is completely overwritten by the killing one.
- if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
- return OW_Complete;
- // If start of the dead access is "before" end of the killing access
- // then accesses overlap.
- else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
- return OW_MaybePartial;
- }
- // If start of the killing access is "before" end of the dead access then
- // accesses overlap.
- else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
- return OW_MaybePartial;
- }
+ /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
+ /// instruction \p AccessI.
+ bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
+ Instruction *MaybeTerm);
- // Can reach here only if accesses are known not to overlap.
- return OW_None;
- }
+ // Returns true if \p Use may read from \p DefLoc.
+ bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
- bool isInvisibleToCallerAfterRet(const Value *V) {
- if (isa<AllocaInst>(V))
- return true;
+ /// Returns true if a dependency between \p Current and \p KillingDef is
+ /// guaranteed to be loop invariant for the loops that they are in. Either
+ /// because they are known to be in the same block, in the same loop level or
+ /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
+ /// during execution of the containing function.
+ bool isGuaranteedLoopIndependent(const Instruction *Current,
+ const Instruction *KillingDef,
+ const MemoryLocation &CurrentLoc);
- auto I = InvisibleToCallerAfterRet.insert({V, false});
- if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
- I.first->second = capturesNothing(PointerMayBeCaptured(
- V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
- return I.first->second;
- }
+ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
+ /// loop. In particular, this guarantees that it only references a single
+ /// MemoryLocation during execution of the containing function.
+ bool isGuaranteedLoopInvariant(const Value *Ptr);
- bool isInvisibleToCallerOnUnwind(const Value *V) {
- bool RequiresNoCaptureBeforeUnwind;
- if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
- return false;
- if (!RequiresNoCaptureBeforeUnwind)
- return true;
+ // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+ // with no read access between them or on any other path to a function exit
+ // block if \p KillingLoc is not accessible after the function returns. If
+ // there is no such MemoryDef, return std::nullopt. The returned value may not
+ // (completely) overwrite \p KillingLoc. Currently we bail out when we
+ // encounter an aliasing MemoryUse (read).
+ std::optional<MemoryAccess *>
+ getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
+ const MemoryLocation &KillingLoc, const Value *KillingUndObj,
+ unsigned &ScanLimit, unsigned &WalkerStepLimit,
+ bool IsMemTerm, unsigned &PartialLimit,
+ bool IsInitializesAttrMemLoc);
+
+ /// Delete dead memory defs and recursively add their operands to ToRemove if
+ /// they became dead.
+ void
+ deleteDeadInstruction(Instruction *SI,
+ SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
+
+ // Check for any extra throws between \p KillingI and \p DeadI that block
+ // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
+ // MemoryDef that may throw are handled during the walk from one def to the
+ // next.
+ bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+ const Value *KillingUndObj);
+
+ // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+ // instructions act as barriers:
+ // * A memory instruction that may throw and \p KillingI accesses a non-stack
+ // object.
+ // * Atomic stores stronger that monotonic.
+ bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
+
+ /// Eliminate writes to objects that are not visible in the caller and are not
+ /// accessed before returning from the function.
+ bool eliminateDeadWritesAtEndOfFunction();
+
+ /// If we have a zero initializing memset following a call to malloc,
+ /// try folding it into a call to calloc.
+ bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
+
+ // Check if there is a dominating condition, that implies that the value
+ // being stored in a ptr is already present in the ptr.
+ bool dominatingConditionImpliesValue(MemoryDef *Def);
+
+ /// \returns true if \p Def is a no-op store, either because it
+ /// directly stores back a loaded value or stores zero to a calloced object.
+ bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
- auto I = CapturedBeforeReturn.insert({V, true});
- if (I.second)
- // NOTE: This could be made more precise by PointerMayBeCapturedBefore
- // with the killing MemoryDef. But we refrain from doing so for now to
- // limit compile-time and this does not cause any changes to the number
- // of stores removed on a large test set in practice.
- I.first->second = capturesAnything(PointerMayBeCaptured(
- V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
- return !I.first->second;
+ bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
+
+ /// Eliminates writes to locations where the value that is being written
+ /// is already stored at the same location.
+ bool eliminateRedundantStoresOfExistingValues();
+
+ // Return the locations written by the initializes attribute.
+ // Note that this function considers:
+ // 1. Unwind edge: use "initializes" attribute only if the callee has
+ // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
+ // or the argument is invisible to caller on unwind. That is, we don't
+ // perform incorrect DSE on unwind edges in the current function.
+ // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+ // the intersected range list of their "initializes" attributes.
+ SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
+
+ // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
+ // killed by `KillingLocWrapper.MemDef`. Return whether
+ // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
+ std::pair<bool, bool>
+ eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
+
+ // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
+ // change state: whether make any change.
+ bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
+};
+} // namespace
+
+DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ const TargetLibraryInfo &TLI, const LoopInfo &LI)
+ : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
+ TLI(TLI), DL(F.getDataLayout()), LI(LI) {
+ // Collect blocks with throwing instructions not modeled in MemorySSA and
+ // alloc-like objects.
+ unsigned PO = 0;
+ for (BasicBlock *BB : post_order(&F)) {
+ PostOrderNumbers[BB] = PO++;
+ for (Instruction &I : *BB) {
+ MemoryAccess *MA = MSSA.getMemoryAccess(&I);
+ if (I.mayThrow() && !MA)
+ ThrowingBlocks.insert(I.getParent());
+
+ auto *MD = dyn_cast_or_null<MemoryDef>(MA);
+ if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
+ (EnableInitializesImprovement && hasInitializesAttr(&I))))
+ MemDefs.push_back(MD);
+ }
}
- std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
- if (!I->mayWriteToMemory())
- return std::nullopt;
+ // Treat byval, inalloca or dead on return arguments the same as Allocas,
+ // stores to them are dead at the end of the function.
+ for (Argument &AI : F.args())
+ if (AI.hasPassPointeeByValueCopyAttr() || AI.hasDeadOnReturnAttr())
+ InvisibleToCallerAfterRet.insert({&AI, true});
- if (auto *CB = dyn_cast<CallBase>(I))
- return MemoryLocation::getForDest(CB, TLI);
+ // Collect whether there is any irreducible control flow in the function.
+ ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
- return MemoryLocation::getOrNone(I);
+ AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
+ return isa<UnreachableInst>(E->getTerminator());
+ });
+}
+
+void DSEState::pushMemUses(MemoryAccess *Acc,
+ SmallVectorImpl<MemoryAccess *> &WorkList,
+ SmallPtrSetImpl<MemoryAccess *> &Visited) {
+ for (Use &U : Acc->uses()) {
+ auto *MA = cast<MemoryAccess>(U.getUser());
+ if (Visited.insert(MA).second)
+ WorkList.push_back(MA);
}
+}
- // Returns a list of <MemoryLocation, bool> pairs written by I.
- // The bool means whether the write is from Initializes attr.
- SmallVector<std::pair<MemoryLocation, bool>, 1>
- getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
- SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
- if (isMemTerminatorInst(I)) {
- if (auto Loc = getLocForTerminator(I))
- Locations.push_back(std::make_pair(Loc->first, false));
- return Locations;
+LocationSize DSEState::strengthenLocationSize(const In...
[truncated]
|
I wanted to see what your take is here. I find that large inlined functions hinder readability of the code by coming in the way of getting an overall picture of a class (by looking at the class declaration). Having them separated out helps get the impl details out of the way when trying to get this high-level overview and also eliminates the added indentation when the functions are defined inline. |
Best to see the diff with whitespace diffs disabled... |
I'm personally OK with functions defined as part of the class definition. I don't want to touch two places when I change function signatures. |
Agree with @kazutakahirata. Also what you did here duplicates the doc comments in two places. And if you don't duplicate them, then you have to look for it in a different place. |
Thanks. What about the concerns of not being able to get a high level view easily without being distracted by impl details? That was my main motivation to do this, but I am not too tied to it. I guess for interface classes (which are in header files) we already follow this and keep the impl separate in .cpp files, this was applying the same to file local classes (impl classes). |
Closing, I think I got my answer :) |
No description provided.