From 9cf9ee792c9873f243e0cc217913c7a9afa4fe0a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 25 Nov 2021 14:43:12 -0600 Subject: [PATCH 001/130] extractor WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 32 +- llvm/lib/Transforms/IPO/IROutliner.cpp | 11 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1510 ++++++++++++++--- .../Transforms/IROutliner/outlining-calls.ll | 1 + 4 files changed, 1334 insertions(+), 220 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index f08173e45a5bf..d3987f94cec54 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include namespace llvm { @@ -109,6 +110,13 @@ class CodeExtractorAnalysisCache { // label, if non-empty, otherwise "extracted". std::string Suffix; + // bool DeleteOldBlocks; + + void analyzeBeforeExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, BlockFrequency &EntryFreq,DenseMap &ExitWeights, SmallPtrSet &ExitBlocks); + + + void prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs); + public: /// Create a code extractor for a sequence of blocks. /// @@ -141,7 +149,7 @@ class CodeExtractorAnalysisCache { /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool KeepOldBlocks = false); /// Perform the extraction, returning the new function and providing an /// interface to see what was categorized as inputs and outputs. @@ -155,7 +163,13 @@ class CodeExtractorAnalysisCache { /// \returns zero when called on a CodeExtractor instance where isEligible /// returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &Inputs, ValueSet &Outputs); + ValueSet &Inputs, ValueSet &Outputs, bool KeepOldBlocks = false); + + Function *extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, + ValueSet &Inputs, ValueSet &Outputs, + const BlockFrequency& EntryFreq, + const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, Function *newFunction ); /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as @@ -233,8 +247,16 @@ class CodeExtractorAnalysisCache { Function *constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, - BasicBlock *newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M); + BasicBlock *&newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap); + + Function *constructFunction2(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + // BasicBlock *&newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M + //, bool KeepOldBlocks, ValueToValueMapTy &VMap + ); void moveCodeToFunction(Function *newFunction); @@ -245,7 +267,7 @@ class CodeExtractorAnalysisCache { CallInst *emitCallAndSwitchStatement(Function *newFunction, BasicBlock *newHeader, - ValueSet &inputs, ValueSet &outputs); + ValueSet &inputs, ValueSet &outputs,bool KeepOldBlocks , ValueToValueMapTy &VMap); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index b8a314c54f18c..d3edf55d7f445 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -38,6 +38,8 @@ using namespace IRSimilarity; // matching and outlining. extern cl::opt DisableBranches; + + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -55,6 +57,13 @@ static cl::opt NoCostModel( cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost")); + +static +cl::opt +KeepOldBlocks("iroutline-copy", cl::init(false), + cl::Hidden, + cl::desc("Copy instead of moving instructions from original function.")); + /// The OutlinableGroup holds all the overarching information for outlining /// a set of regions that are structurally similar to one another, such as the /// types of the overall function, the output blocks, the sets of stores needed @@ -1903,7 +1912,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) { Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); Region.ExtractedFunction = - Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); + Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs, KeepOldBlocks); // If the extraction was successful, find the BasicBlock, and reassign the // OutlinableRegion blocks diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 96aff563aa9b6..411e2a38206c2 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -62,6 +62,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/Cloning.h" #include #include #include @@ -228,10 +229,11 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, continue; } +#if 0 // All blocks other than the first must not have predecessors outside of // the subgraph which is being extracted. for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB)) { + if (!Result.count(PBB) && DT->isReachableFromEntry(PBB) { LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " "outside the region except for the first block!\n" << "Problematic source BB: " << BB->getName() << "\n" @@ -239,6 +241,7 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, << "\n"); return {}; } +#endif } return Result; @@ -248,7 +251,7 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) + std::string Suffix ) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), @@ -257,7 +260,7 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, - std::string Suffix) + std::string Suffix ) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, @@ -495,6 +498,11 @@ CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { + if (Blocks.empty()) { + // ?? + return; + } + Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -650,6 +658,9 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { + // Assume should not be the reason to introduce a parameter for the extracted function. + if (isa(II)) continue; + for (auto &OI : II.operands()) { Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) @@ -814,10 +825,10 @@ void CodeExtractor::splitReturnBlocks() { Function *CodeExtractor::constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, - BasicBlock *newRootNode, + BasicBlock *&newRootNode, BasicBlock *newHeader, Function *oldFunction, - Module *M) { + Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); @@ -830,18 +841,18 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } std::vector paramTy; - + SmallVector VMapArg; // Add the types of the input values to the function's argument list for (Value *value : inputs) { LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); + paramTy.push_back(value->getType()); VMapArg.push_back(value); } // Add the types of the output values to the function's argument list. for (Value *output : outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); if (AggregateArgs) - paramTy.push_back(output->getType()); + paramTy.push_back(output->getType()); else paramTy.push_back(PointerType::getUnqual(output->getType())); } @@ -871,6 +882,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), oldFunction->getName() + "." + SuffixToUse, M); + // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) newFunction->setDoesNotThrow(); @@ -984,7 +996,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, newFunction->addFnAttr(Attr); } - newFunction->getBasicBlockList().push_back(newRootNode); + if (newRootNode) { + newFunction->getBasicBlockList().push_back(newRootNode); + } else { + newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); + // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME + } + + // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); @@ -1005,11 +1024,15 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else RewriteVal = &*AI++; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); + if (KeepOldBlocks) { + VMap[ inputs[i]] = RewriteVal ; + } else { + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } } // Set names for input and output arguments. @@ -1025,7 +1048,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // within the new function. This must be done before we lose track of which // blocks were originally in the code region. std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) + for (auto &U : Users) // FIXME: KeepOldBlocks? // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block if (Instruction *I = dyn_cast(U)) @@ -1036,6 +1059,187 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, return newFunction; } + +Function *CodeExtractor::constructFunction2(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + // BasicBlock *&newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M//, bool KeepOldBlocks, ValueToValueMapTy &VMap +){ + LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector paramTy; + SmallVector VMapArg; + // Add the types of the input values to the function's argument list + for (Value *value : inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); VMapArg.push_back(value); + } + + // Add the types of the output values to the function's argument list. + for (Value *output : outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + if (AggregateArgs) + paramTy.push_back(output->getType()); + else + paramTy.push_back(PointerType::getUnqual(output->getType())); + } + + LLVM_DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : paramTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); + + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + StructTy = StructType::get(M->getContext(), paramTy); + paramTy.clear(); + paramTy.push_back(PointerType::getUnqual(StructTy)); + } + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, + AllowVarArgs && oldFunction->isVarArg()); + + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + // Create the new function + Function *newFunction = Function::Create( + funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), + oldFunction->getName() + "." + SuffixToUse, M); + + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(); + + // Inherit the uwtable attribute if we need to. + if (oldFunction->hasUWTable()) + newFunction->setHasUWTable(); + + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. + // (e.g. If the extracted region contains a call to an x86.sse + // instruction we need to make sure that the extracted region has the + // "target-features" attribute allowing it to be lowered. + // FIXME: This should be changed to check to see if a specific + // attribute can not be inherited. + for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. + case Attribute::Alignment: + case Attribute::AllocSize: + case Attribute::ArgMemOnly: + case Attribute::Builtin: + case Attribute::ByVal: + case Attribute::Convergent: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoMerge: + case Attribute::NoReturn: + case Attribute::NoSync: + case Attribute::NoUndef: + case Attribute::None: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::ReturnsTwice: + case Attribute::SExt: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::WillReturn: + case Attribute::WriteOnly: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + continue; + // Those attributes should be safe to propagate to the extracted function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::DisableSanitizerInstrumentation: + case Attribute::Hot: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoCallback: + case Attribute::NoDuplicate: + case Attribute::NoFree: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::NoSanitizeCoverage: + case Attribute::NullPointerIsValid: + case Attribute::OptForFuzzing: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::ShadowCallStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::SanitizeMemTag: + case Attribute::SpeculativeLoadHardening: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + case Attribute::VScaleRange: + case Attribute::NoCfCheck: + case Attribute::MustProgress: + case Attribute::NoProfile: + break; + } + + newFunction->addFnAttr(Attr); + } + + + + return newFunction; +} + + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1123,7 +1327,7 @@ static void insertLifetimeMarkersSurroundingCall( CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ValueSet &inputs, - ValueSet &outputs) { + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector params, StructValues, ReloadOutputs, Reloads; @@ -1133,6 +1337,22 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, const DataLayout &DL = M->getDataLayout(); CallInst *call = nullptr; + BasicBlock *AllocaBlock ; + if (KeepOldBlocks) { + AllocaBlock = &newFunction->front(); + } else { + AllocaBlock = &codeReplacer->getParent()->front(); + } + +#if 0 + auto NewAlloca = [&](Type *Ty, unsigned AddrSpace, Value *ArraySize, + const Twine &Name) { + if (!KeepOldBlocks) + return new AllocaInst(Ty, AddrSpace,ArraySize, Name, &codeReplacer->getParent()->front().front()); + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); + }; +#endif + // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; SmallVector SwiftErrorArgs; @@ -1153,9 +1373,12 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, StructValues.push_back(output); } else { AllocaInst *alloca = + // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); +#if 1 new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); + &AllocaBlock->front()); +#endif ReloadOutputs.push_back(alloca); params.push_back(alloca); } @@ -1170,9 +1393,12 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); +#if 1 Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", - &codeReplacer->getParent()->front().front()); + &AllocaBlock->front()); +#endif params.push_back(Struct); for (unsigned i = 0, e = inputs.size(); i != e; ++i) { @@ -1187,8 +1413,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, } // Emit the call to the function - call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); + call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + + + // Add debug location to the new call, if the original function has debug // info. In that case, the terminator of the entry block of the extracted // function contains the first debug location of the extracted function, @@ -1263,6 +1491,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); + VMap[OldTarget] = NewTarget; unsigned SuccNum = switchVal++; Value *brVal = nullptr; @@ -1286,6 +1515,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, OldTarget); } + //if (!KeepOldBlocks) for (BasicBlock *Block : Blocks) { Instruction *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -1297,7 +1527,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, assert(NewTarget && "Unknown target block!"); // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); + TI->setSuccessor(i, NewTarget); } } @@ -1401,17 +1631,20 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + + auto newFuncIt = newFunction->front().getIterator(); for (BasicBlock *Block : Blocks) { - // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(Block); - - // Insert this basic block into the new function - // Insert the original blocks after the entry block created - // for the new function. The entry block may be followed - // by a set of exit blocks at this point, but these exit - // blocks better be placed at the end of the new function. - newFuncIt = newBlocks.insertAfter(newFuncIt, Block); + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(Block); + + // Insert this basic block into the new function + // Insert the original blocks after the entry block created + // for the new function. The entry block may be followed + // by a set of exit blocks at this point, but these exit + // blocks better be placed at the end of the new function. + newFuncIt = newBlocks.insertAfter(newFuncIt, Block); } } @@ -1586,220 +1819,388 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, } Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool KeepOldBlocks ) { ValueSet Inputs, Outputs; - return extractCodeRegion(CEAC, Inputs, Outputs); + return extractCodeRegion(CEAC, Inputs, Outputs,KeepOldBlocks); +} + + + + +void CodeExtractor::analyzeBeforeExtraction( + const CodeExtractorAnalysisCache& CEAC, ValueSet& inputs, ValueSet& outputs, + BlockFrequency& EntryFreq, + DenseMap &ExitWeights, SmallPtrSet &ExitBlocks) { + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + + // Calculate the entry frequency of the new function before we change the root + // block. + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } + } + + // Calculate the exit blocks for the extracted region and the total exit + // weights for each of those blocks. + + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (!Blocks.count(Succ)) { + // Update the branch weight for this successor. + if (BFI) { + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } + ExitBlocks.insert(Succ); + } + } + } + NumExitBlocks = ExitBlocks.size(); +} + + + +void CodeExtractor::prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs) { + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + } Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &inputs, ValueSet &outputs) { - if (!isEligible()) + ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { + if (!isEligible()) return nullptr; - // Assumption: this is a single-entry code region, and the header is the first - // block in the region. - BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); - - // Calculate the entry frequency of the new function before we change the root - // block. - BlockFrequency EntryFreq; - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } - } - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock *Block : Blocks) { - for (Instruction &I : llvm::make_early_inc_range(*Block)) { - if (auto *AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); - } - } - } + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. - DenseMap ExitWeights; - SmallPtrSet ExitBlocks; - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : successors(Block)) { - if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + BlockFrequency EntryFreq; + DenseMap ExitWeights; + SmallPtrSet ExitBlocks; + + // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); + + // Calculate the entry frequency of the new function before we change the root + // block. + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); } - ExitBlocks.insert(Succ); - } } - } - NumExitBlocks = ExitBlocks.size(); - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - OldTargets.push_back(OldTarget); + + // canonicalization + prepareForExtraction(CEAC,inputs, outputs); + + + // analysis, after ret splitting + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (!Blocks.count(Succ)) { + // Update the branch weight for this successor. + if (BFI) { + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } + ExitBlocks.insert(Succ); + } + } } - } + NumExitBlocks = ExitBlocks.size(); + + // analyzis, after ret splitting + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + OldTargets.push_back(OldTarget); + } + } + + + // canonicalization + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(header); + + // canonicalization, after ret splitting + severSplitPHINodesOfExits(ExitBlocks); + + + // analysis + ValueSet SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + + + + // analysis, after ret splitting (for values returned) + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs, SinkingCands); + + Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); + + if (KeepOldBlocks) + return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction ); + + + + + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // TODO: remove assumes only after moving + + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock* Block : Blocks) { + for (Instruction& I : llvm::make_early_inc_range(*Block)) { + if (auto* AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); + } + } + } + + + + + - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(header); - severSplitPHINodesOfExits(ExitBlocks); // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), - "newFuncRoot"); - auto *BranchI = BranchInst::Create(header); - // If the original function has debug info, we have to add a debug location - // to the new branch instruction from the artificial entry block. - // We use the debug location of the first instruction in the extracted - // blocks, as there is no other equivalent line in the source code. - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } - newFuncRoot->getInstList().push_back(BranchI); - - ValueSet SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); - assert(HoistingCands.empty() || CommonExit); - - // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs, SinkingCands); - - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - AllocaInst *FirstSunkAlloca = nullptr; - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; - } - } - assert((SinkingCands.empty() || FirstSunkAlloca) && - "Did not expect a sink candidate without any allocas"); - for (auto *II : SinkingCands) { - if (!isa(II)) { - cast(II)->moveAfter(FirstSunkAlloca); - } - } + "codeRepl", oldFunction, + header); + + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); + + + auto *BranchI = BranchInst::Create(header); + // If the original function has debug info, we have to add a debug location + // to the new branch instruction from the artificial entry block. + // We use the debug location of the first instruction in the extracted + // blocks, as there is no other equivalent line in the source code. + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } + newFuncRoot->getInstList().push_back(BranchI); - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) - cast(II)->moveBefore(TI); - } - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - - // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = - constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, - oldFunction, oldFunction->getParent()); - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } - CallInst *TheCall = - emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst *FirstSunkAlloca = nullptr; + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && + "Did not expect a sink candidate without any allocas"); + for (auto *II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveAfter(FirstSunkAlloca); + } + } - moveCodeToFunction(newFunction); + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + } - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + // Construct new function based on inputs/outputs & add allocas for all defs. + ValueToValueMapTy VMap; + +#if 0 + Function *newFunction = constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, oldFunction, oldFunction->getParent(),false,VMap); +#else + auto newRootNode = newFuncRoot; + auto newHeader = codeReplacer ; + + if (newRootNode) { + newFunction->getBasicBlockList().push_back(newRootNode); + } else { + newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); + // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME + } - // Propagate personality info to the new function if there is one. - if (oldFunction->hasPersonalityFn()) - newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + //StructTy = StructType::get(M->getContext(), paramTy); + StructTy = cast( newFunction->getArg(0)->getType()); + } - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + if (KeepOldBlocks) { + VMap[ inputs[i]] = RewriteVal ; + } else { + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); +#endif + + + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } - } - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { - const Instruction *Term = BB.getTerminator(); - return isa(Term) || isa(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); + + + + CallInst *TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false,VMap); + + + moveCodeToFunction(newFunction); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall( + oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode* PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + for (BasicBlock* ExitBB : ExitBlocks) + for (PHINode& PN : ExitBB->phis()) { + Value* IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } + else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); + + + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + bool doesNotReturn = none_of(*newFunction, [](const BasicBlock& BB) { + const Instruction* Term = BB.getTerminator(); + return isa(Term) || isa(Term); + }); + if (doesNotReturn) + newFunction->setDoesNotReturn(); + LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { newFunction->dump(); @@ -1812,6 +2213,687 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, return newFunction; } + +Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs,const BlockFrequency& EntryFreq, + const DenseMap &ExitWeights, + const SmallPtrSet &ExitBlocks, + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit,Function *newFunction) { + // Assumption: this is a single-entry code region, and the header is the first block in the region. + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + + + + + // This takes place of the original loop // TODO: move to after construction function + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + BasicBlock *newRootNode = nullptr; + auto newHeader = codeReplacer; + ValueToValueMapTy VMap; + Module *M = oldFunction->getParent(); + auto KeepOldBlocks = true; +#if 0 + newFunction = constructFunction(inputs, outputs, header, NewRoot, codeReplacer, oldFunction, oldFunction->getParent(), true,VMap); + /* + Function *CodeExtractor::constructFunction( + const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + BasicBlock *&newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + */ +#else +#if 0 + LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector paramTy; + SmallVector VMapArg; + // Add the types of the input values to the function's argument list + for (Value *value : inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); VMapArg.push_back(value); + } + + // Add the types of the output values to the function's argument list. + for (Value *output : outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + if (AggregateArgs) + paramTy.push_back(output->getType()); + else + paramTy.push_back(PointerType::getUnqual(output->getType())); + } + + LLVM_DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : paramTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); + + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + StructTy = StructType::get(M->getContext(), paramTy); + paramTy.clear(); + paramTy.push_back(PointerType::getUnqual(StructTy)); + } + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, + AllowVarArgs && oldFunction->isVarArg()); + + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + // Create the new function + Function *newFunction = Function::Create( + funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), + oldFunction->getName() + "." + SuffixToUse, M); + + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(); + + // Inherit the uwtable attribute if we need to. + if (oldFunction->hasUWTable()) + newFunction->setHasUWTable(); + + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. + // (e.g. If the extracted region contains a call to an x86.sse + // instruction we need to make sure that the extracted region has the + // "target-features" attribute allowing it to be lowered. + // FIXME: This should be changed to check to see if a specific + // attribute can not be inherited. + for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. + case Attribute::Alignment: + case Attribute::AllocSize: + case Attribute::ArgMemOnly: + case Attribute::Builtin: + case Attribute::ByVal: + case Attribute::Convergent: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoMerge: + case Attribute::NoReturn: + case Attribute::NoSync: + case Attribute::NoUndef: + case Attribute::None: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::ReturnsTwice: + case Attribute::SExt: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::WillReturn: + case Attribute::WriteOnly: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + continue; + // Those attributes should be safe to propagate to the extracted function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::DisableSanitizerInstrumentation: + case Attribute::Hot: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoCallback: + case Attribute::NoDuplicate: + case Attribute::NoFree: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::NoSanitizeCoverage: + case Attribute::NullPointerIsValid: + case Attribute::OptForFuzzing: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::ShadowCallStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::SanitizeMemTag: + case Attribute::SpeculativeLoadHardening: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + case Attribute::VScaleRange: + case Attribute::NoCfCheck: + case Attribute::MustProgress: + case Attribute::NoProfile: + break; + } + + newFunction->addFnAttr(Attr); + } +#endif + + if (newRootNode) { + newFunction->getBasicBlockList().push_back(newRootNode); + } else { + newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); + // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME + } + + + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + //StructTy = StructType::get(M->getContext(), paramTy); + StructTy = cast( newFunction->getArg(0)->getType()); + } + + + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + if (KeepOldBlocks) { + VMap[ inputs[i]] = RewriteVal ; + } else { + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + + //return newFunction; +#endif + BasicBlock *AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); + BranchInst::Create(newRootNode, AllocaBlock); + + // Recursive calls to oldFunction still call the old Function from extracted function. + + VMap[oldFunction] = oldFunction; + + +#if 0 + CallInst *TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs,true, VMap); + /* + CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + */ +#else + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector params, StructValues, ReloadOutputs, Reloads; + + // Module *M = newFunction->getParent(); + LLVMContext &Context = M->getContext(); + const DataLayout &DL = M->getDataLayout(); + CallInst *call = nullptr; + +#if 0 + BasicBlock *AllocaBlock ; + if (KeepOldBlocks) { + AllocaBlock = &newFunction->front(); + } else { + AllocaBlock = &codeReplacer->getParent()->front(); + } +#endif + +#if 0 + auto NewAlloca = [&](Type *Ty, unsigned AddrSpace, Value *ArraySize, + const Twine &Name) { + if (!KeepOldBlocks) + return new AllocaInst(Ty, AddrSpace,ArraySize, Name, &codeReplacer->getParent()->front().front()); + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); + }; +#endif + + // Add inputs as params, or to be filled into the struct + unsigned ArgNo = 0; + SmallVector SwiftErrorArgs; + for (Value *input : inputs) { + if (AggregateArgs) + StructValues.push_back(input); + else { + params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ArgNo); + } + ++ArgNo; + } + + // Create allocas for the outputs + for (Value *output : outputs) { + if (AggregateArgs) { + StructValues.push_back(output); + } else { + AllocaInst *alloca = + // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); +#if 1 + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &AllocaBlock->front()); +#endif + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + StructType *StructArgTy = nullptr; + AllocaInst *Struct = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector ArgTypes; + for (Value *V : StructValues) + ArgTypes.push_back(V->getType()); + + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); +#if 1 + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", + &AllocaBlock->front()); +#endif + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + + // Emit the call to the function + call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + + + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + codeReplacer->getInstList().push_back(call); + + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", + codeReplacer); + Reloads.push_back(load); + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. + unsigned switchVal = 0; + for (BasicBlock *OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + unsigned SuccNum = switchVal++; + + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } + + //if (!KeepOldBlocks) +#if 1 + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + // rewrite the original branch instruction with this new target + // TI->setSuccessor(i, NewTarget); + VMap[OldTarget] = NewTarget; + } + } +#endif + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertBefore); + ++OAI; + } + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + auto TheCall =call; +#endif + + // Function *oldFunc =oldFunction; + Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + +#if 0 + DebugInfoFinder DIFinder; + assert((newFunction->getParent() == nullptr || + newFunction->getParent() == oldFunc->getParent()) && + "Expected NewFunc to have the same parent, or no parent"); + if (DISubprogram * SPClonedWithinModule = oldFunc->getSubprogram()) + DIFinder.processSubprogram(SPClonedWithinModule); +#endif + + + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock *Block : Blocks) { + BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction,Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); + } + + + + for (auto Pred : predecessors(header)) { + VMap[Pred] = newRootNode; + } + + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock* Block : Blocks) { + auto NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + // BasicBlock *Y =NewBlock; + + // Loop over all instructions, fixing each one as we find it... + for (Instruction& II : cast (*NewBlock)) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + + int b = 0; + + auto HeaderCopy = VMap.lookup(header); + assert(HeaderCopy); + auto *BranchI = BranchInst::Create(header, newRootNode); + + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + bool doesNotReturn = none_of(*newFunction, [](const BasicBlock& BB) { + const Instruction* Term = BB.getTerminator(); + if (!Term) return false; // for "newFuncRoot" + return isa(Term) || isa(Term); + }); + if (doesNotReturn) + newFunction->setDoesNotReturn(); + + + LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { + newFunction->dump(); + report_fatal_error("verification of newFunction failed!"); + }); + LLVM_DEBUG(if (verifyFunction(*oldFunction)) + report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); + return newFunction; +} + bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { diff --git a/llvm/test/Transforms/IROutliner/outlining-calls.ll b/llvm/test/Transforms/IROutliner/outlining-calls.ll index 2372c4f674964..4ed7ea69a02d6 100644 --- a/llvm/test/Transforms/IROutliner/outlining-calls.ll +++ b/llvm/test/Transforms/IROutliner/outlining-calls.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -iroutline-copy < %s | FileCheck %s ; This test checks that we do can outline calls, but only if they have the same ; function type and the same name. From 5efb70cf525088929d09986247881026d422bcf3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 25 Nov 2021 14:55:29 -0600 Subject: [PATCH 002/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 29 ++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 411e2a38206c2..cbcb30df87ce3 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1877,6 +1877,20 @@ void CodeExtractor::prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, } + +static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Blocks, Instruction *BranchI) { + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } +} + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { @@ -1985,10 +1999,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - - // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, @@ -2001,6 +2011,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, auto *BranchI = BranchInst::Create(header); + +#if 0 // If the original function has debug info, we have to add a debug location // to the new branch instruction from the artificial entry block. // We use the debug location of the first instruction in the extracted @@ -2015,6 +2027,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, }); }); } +#endif + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + newFuncRoot->getInstList().push_back(BranchI); @@ -2488,7 +2503,8 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach //return newFunction; #endif BasicBlock *AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); - BranchInst::Create(newRootNode, AllocaBlock); + auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Recursive calls to oldFunction still call the old Function from extracted function. @@ -2869,7 +2885,8 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach auto HeaderCopy = VMap.lookup(header); assert(HeaderCopy); - auto *BranchI = BranchInst::Create(header, newRootNode); + auto *BranchI2 = BranchInst::Create(header, newRootNode); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); // Mark the new function `noreturn` if applicable. Terminators which resume // exception propagation are treated as returning instructions. This is to From bc9e2c846641a7d416bb24bd6026fb4c3295e503 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 25 Nov 2021 15:32:44 -0600 Subject: [PATCH 003/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 11 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 253 +++--------------- 2 files changed, 47 insertions(+), 217 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index d3987f94cec54..7e49a397d98b1 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -169,7 +169,10 @@ class CodeExtractorAnalysisCache { ValueSet &Inputs, ValueSet &Outputs, const BlockFrequency& EntryFreq, const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, Function *newFunction ); + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, Function *newFunction, + BasicBlock * codeReplacer, + BasicBlock * NewEntry, + BasicBlock * newRootNode ); /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as @@ -250,6 +253,12 @@ class CodeExtractorAnalysisCache { BasicBlock *&newRootNode, BasicBlock *newHeader, Function *oldFunction, Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap); + void handleParams( + Function *oldFunction, Function *newFunction, + const ValueSet &inputs, + const ValueSet &outputs) ; + + Function *constructFunction2(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cbcb30df87ce3..30bdb1427cb91 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1240,6 +1240,14 @@ Function *CodeExtractor::constructFunction2(const ValueSet &inputs, } + +void CodeExtractor::handleParams( + Function *oldFunction, Function *newFunction, + const ValueSet &inputs, + const ValueSet &outputs) { +} + + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1976,15 +1984,30 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); + BasicBlock * newRootNode=newFuncRoot; + + + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + if (KeepOldBlocks) - return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction ); + return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction, codeReplacer,nullptr,newRootNode ); + + // Transforms/HotColdSplit/stale-assume-in-original-func.ll // TODO: remove assumes only after moving - // Remove @llvm.assume calls that will be moved to the new function from the // old function's assumption cache. for (BasicBlock* Block : Blocks) { @@ -1999,18 +2022,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); - - - auto *BranchI = BranchInst::Create(header); + + auto *BranchI = BranchInst::Create(header, newFuncRoot); #if 0 // If the original function has debug info, we have to add a debug location @@ -2028,9 +2043,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, }); } #endif - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - - newFuncRoot->getInstList().push_back(BranchI); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + // newFuncRoot->getInstList().push_back(BranchI); @@ -2073,16 +2087,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, #if 0 Function *newFunction = constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, oldFunction, oldFunction->getParent(),false,VMap); #else - auto newRootNode = newFuncRoot; + // auto newRootNode = newFuncRoot; auto newHeader = codeReplacer ; - if (newRootNode) { - newFunction->getBasicBlockList().push_back(newRootNode); - } else { - newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); - // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME - } - StructType *StructTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { @@ -2232,7 +2239,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs,const BlockFrequency& EntryFreq, const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit,Function *newFunction) { + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit,Function *newFunction, + BasicBlock * codeReplacer, + BasicBlock * NewEntry, BasicBlock * newRootNode ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); @@ -2240,207 +2249,19 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach - // This takes place of the original loop // TODO: move to after construction function - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); - BasicBlock *newRootNode = nullptr; + auto newHeader = codeReplacer; ValueToValueMapTy VMap; Module *M = oldFunction->getParent(); auto KeepOldBlocks = true; -#if 0 - newFunction = constructFunction(inputs, outputs, header, NewRoot, codeReplacer, oldFunction, oldFunction->getParent(), true,VMap); - /* - Function *CodeExtractor::constructFunction( - const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *&newRootNode, - BasicBlock *newHeader, - Function *oldFunction, - Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap) { - */ -#else -#if 0 - LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); - LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - - // This function returns unsigned, outputs will go back by reference. - switch (NumExitBlocks) { - case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; - } - std::vector paramTy; - SmallVector VMapArg; - // Add the types of the input values to the function's argument list - for (Value *value : inputs) { - LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); VMapArg.push_back(value); - } - // Add the types of the output values to the function's argument list. - for (Value *output : outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - if (AggregateArgs) - paramTy.push_back(output->getType()); - else - paramTy.push_back(PointerType::getUnqual(output->getType())); - } - LLVM_DEBUG({ - dbgs() << "Function type: " << *RetTy << " f("; - for (Type *i : paramTy) - dbgs() << *i << ", "; - dbgs() << ")\n"; - }); - - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - StructTy = StructType::get(M->getContext(), paramTy); - paramTy.clear(); - paramTy.push_back(PointerType::getUnqual(StructTy)); - } - FunctionType *funcType = - FunctionType::get(RetTy, paramTy, - AllowVarArgs && oldFunction->isVarArg()); - - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; - // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); - - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); - - // Inherit all of the target dependent attributes and white-listed - // target independent attributes. - // (e.g. If the extracted region contains a call to an x86.sse - // instruction we need to make sure that the extracted region has the - // "target-features" attribute allowing it to be lowered. - // FIXME: This should be changed to check to see if a specific - // attribute can not be inherited. - for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { - if (Attr.isStringAttribute()) { - if (Attr.getKindAsString() == "thunk") - continue; - } else - switch (Attr.getKindAsEnum()) { - // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: - case Attribute::AllocSize: - case Attribute::ArgMemOnly: - case Attribute::Builtin: - case Attribute::ByVal: - case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::ElementType: - case Attribute::InAlloca: - case Attribute::InReg: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: - case Attribute::JumpTable: - case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: - case Attribute::NoBuiltin: - case Attribute::NoCapture: - case Attribute::NoMerge: - case Attribute::NoReturn: - case Attribute::NoSync: - case Attribute::NoUndef: - case Attribute::None: - case Attribute::NonNull: - case Attribute::Preallocated: - case Attribute::ReadNone: - case Attribute::ReadOnly: - case Attribute::Returned: - case Attribute::ReturnsTwice: - case Attribute::SExt: - case Attribute::Speculatable: - case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::SwiftAsync: - case Attribute::WillReturn: - case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::ByRef: - case Attribute::EndAttrKinds: - case Attribute::EmptyKey: - case Attribute::TombstoneKey: - continue; - // Those attributes should be safe to propagate to the extracted function. - case Attribute::AlwaysInline: - case Attribute::Cold: - case Attribute::DisableSanitizerInstrumentation: - case Attribute::Hot: - case Attribute::NoRecurse: - case Attribute::InlineHint: - case Attribute::MinSize: - case Attribute::NoCallback: - case Attribute::NoDuplicate: - case Attribute::NoFree: - case Attribute::NoImplicitFloat: - case Attribute::NoInline: - case Attribute::NonLazyBind: - case Attribute::NoRedZone: - case Attribute::NoUnwind: - case Attribute::NoSanitizeCoverage: - case Attribute::NullPointerIsValid: - case Attribute::OptForFuzzing: - case Attribute::OptimizeNone: - case Attribute::OptimizeForSize: - case Attribute::SafeStack: - case Attribute::ShadowCallStack: - case Attribute::SanitizeAddress: - case Attribute::SanitizeMemory: - case Attribute::SanitizeThread: - case Attribute::SanitizeHWAddress: - case Attribute::SanitizeMemTag: - case Attribute::SpeculativeLoadHardening: - case Attribute::StackProtect: - case Attribute::StackProtectReq: - case Attribute::StackProtectStrong: - case Attribute::StrictFP: - case Attribute::UWTable: - case Attribute::VScaleRange: - case Attribute::NoCfCheck: - case Attribute::MustProgress: - case Attribute::NoProfile: - break; - } - - newFunction->addFnAttr(Attr); - } -#endif - - if (newRootNode) { - newFunction->getBasicBlockList().push_back(newRootNode); - } else { - newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); - // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME - } + + // TODO: Make StructTy a field StructType *StructTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { //StructTy = StructType::get(M->getContext(), paramTy); @@ -2501,7 +2322,7 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach I->replaceUsesOfWith(header, newHeader); //return newFunction; -#endif + BasicBlock *AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); From 6dbe2b6f994c87cc92ac81a90b1e79e33685d9e6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 25 Nov 2021 16:02:22 -0600 Subject: [PATCH 004/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 117 +++++++++----------- 1 file changed, 53 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 30bdb1427cb91..c43e4e02b9380 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1982,6 +1982,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); + + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); @@ -1993,9 +1995,49 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + auto newHeader = codeReplacer; + + ValueToValueMapTy VMap; + + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + //StructTy = StructType::get(M->getContext(), paramTy); + StructTy = cast(newFunction->getArg(0)->getType()); + } + + + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + NewValues.push_back(RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } if (KeepOldBlocks) @@ -2023,26 +2065,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - auto *BranchI = BranchInst::Create(header, newFuncRoot); - -#if 0 - // If the original function has debug info, we have to add a debug location - // to the new branch instruction from the artificial entry block. - // We use the debug location of the first instruction in the extracted - // blocks, as there is no other equivalent line in the source code. - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } -#endif applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // newFuncRoot->getInstList().push_back(BranchI); @@ -2067,6 +2090,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + if (!HoistingCands.empty()) { auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Instruction *TI = HoistToBlock->getTerminator(); @@ -2081,45 +2105,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet LifetimesStart; eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - // Construct new function based on inputs/outputs & add allocas for all defs. - ValueToValueMapTy VMap; - -#if 0 - Function *newFunction = constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, oldFunction, oldFunction->getParent(),false,VMap); -#else - // auto newRootNode = newFuncRoot; - auto newHeader = codeReplacer ; - - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - //StructTy = StructType::get(M->getContext(), paramTy); - StructTy = cast( newFunction->getArg(0)->getType()); - } + + - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; + Value *RewriteVal = NewValues[i]; + if (KeepOldBlocks) { - VMap[ inputs[i]] = RewriteVal ; - } else { + VMap[inputs[i]] = RewriteVal; + } else { std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User* use : Users) if (Instruction* inst = dyn_cast(use)) @@ -2128,14 +2126,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); - } // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which @@ -2148,7 +2138,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (I->isTerminator() && I->getFunction() == oldFunction && !Blocks.count(I->getParent())) I->replaceUsesOfWith(header, newHeader); -#endif + // Propagate personality info to the new function if there is one. @@ -2167,7 +2157,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - CallInst *TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false,VMap); From 032d0fce886c45ae56b5d9ad6e08a1d578b0144a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 29 Nov 2021 12:09:48 -0600 Subject: [PATCH 005/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 + llvm/lib/Transforms/Utils/CodeExtractor.cpp | 133 +++++++++--------- 2 files changed, 72 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 7e49a397d98b1..1d97a20da91da 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -274,6 +274,8 @@ class CodeExtractorAnalysisCache { DenseMap &ExitWeights, BranchProbabilityInfo *BPI); + + CallInst *emitCallAndSwitchStatement(Function *newFunction, BasicBlock *newHeader, ValueSet &inputs, ValueSet &outputs,bool KeepOldBlocks , ValueToValueMapTy &VMap); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c43e4e02b9380..14d64033925d5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1128,6 +1128,10 @@ Function *CodeExtractor::constructFunction2(const ValueSet &inputs, if (oldFunction->hasUWTable()) newFunction->setHasUWTable(); + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + // Inherit all of the target dependent attributes and white-listed // target independent attributes. // (e.g. If the extracted region contains a call to an x86.sse @@ -1329,22 +1333,30 @@ static void insertLifetimeMarkersSurroundingCall( } } + + + + + + /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; + std::vector ReloadOutputs, Reloads; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); const DataLayout &DL = M->getDataLayout(); CallInst *call = nullptr; + + // TOOD: Pass AllocaBlock BasicBlock *AllocaBlock ; if (KeepOldBlocks) { AllocaBlock = &newFunction->front(); @@ -1352,28 +1364,23 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, AllocaBlock = &codeReplacer->getParent()->front(); } -#if 0 - auto NewAlloca = [&](Type *Ty, unsigned AddrSpace, Value *ArraySize, - const Twine &Name) { - if (!KeepOldBlocks) - return new AllocaInst(Ty, AddrSpace,ArraySize, Name, &codeReplacer->getParent()->front().front()); - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); - }; -#endif // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; + std::vector params; + std::vector StructValues; SmallVector SwiftErrorArgs; for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; + if (AggregateArgs) + StructValues.push_back(input); + else { + params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ArgNo); + } + ++ArgNo; } + // Create allocas for the outputs for (Value *output : outputs) { @@ -1392,6 +1399,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, } } + StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { @@ -1413,13 +1421,13 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); new StoreInst(StructValues[i], GEP, codeReplacer); } } + // Emit the call to the function call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); @@ -1436,7 +1444,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, codeReplacer->getInstList().push_back(call); // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { // TOOD: Move to constructFunction call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); } @@ -1453,8 +1461,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { @@ -1467,8 +1474,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); + if (!KeepOldBlocks) { + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } } } @@ -1523,21 +1532,28 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, OldTarget); } - //if (!KeepOldBlocks) - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } - } + + + for (BasicBlock* Block : Blocks) { + Instruction* TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock* OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock* NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } + } + } + + // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke @@ -2039,11 +2055,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, AI->setName(outputs[i]->getName()+".out"); } + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } - if (KeepOldBlocks) - return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction, codeReplacer,nullptr,newRootNode ); + if (KeepOldBlocks) + return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction, codeReplacer,nullptr,newRootNode ); @@ -2082,8 +2106,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, FirstSunkAlloca = AI; } } - assert((SinkingCands.empty() || FirstSunkAlloca) && - "Did not expect a sink candidate without any allocas"); + assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); for (auto *II : SinkingCands) { if (!isa(II)) { cast(II)->moveAfter(FirstSunkAlloca); @@ -2114,19 +2137,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value *RewriteVal = NewValues[i]; - - if (KeepOldBlocks) { - VMap[inputs[i]] = RewriteVal; - } else { std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User* use : Users) if (Instruction* inst = dyn_cast(use)) if (Blocks.count(inst->getParent())) inst->replaceUsesOfWith(inputs[i], RewriteVal); - } } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which // blocks were originally in the code region. @@ -2141,24 +2161,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Propagate personality info to the new function if there is one. - if (oldFunction->hasPersonalityFn()) - newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } - CallInst *TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false,VMap); - + moveCodeToFunction(newFunction); From e61d2180bdbeb68531e0ea7513d463977c121896 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 29 Nov 2021 23:25:00 -0600 Subject: [PATCH 006/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 226 +++++++++----------- 1 file changed, 102 insertions(+), 124 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 14d64033925d5..7fd180ac7d346 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2066,16 +2066,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - if (KeepOldBlocks) - return extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq,ExitWeights,ExitBlocks,SinkingCands,HoistingCands,CommonExit, newFunction, codeReplacer,nullptr,newRootNode ); - - - - - // Transforms/HotColdSplit/stale-assume-in-original-func.ll - // TODO: remove assumes only after moving - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. + if (KeepOldBlocks) { + extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, newFunction, codeReplacer, nullptr, newRootNode); + } else { + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // TODO: remove assumes only after moving + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. for (BasicBlock* Block : Blocks) { for (Instruction& I : llvm::make_early_inc_range(*Block)) { if (auto* AI = dyn_cast(&I)) { @@ -2085,129 +2082,129 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } } - - auto *BranchI = BranchInst::Create(header, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - // newFuncRoot->getInstList().push_back(BranchI); + auto* BranchI = BranchInst::Create(header, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + // newFuncRoot->getInstList().push_back(BranchI); - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - AllocaInst *FirstSunkAlloca = nullptr; - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; - } - } - assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); - for (auto *II : SinkingCands) { - if (!isa(II)) { - cast(II)->moveAfter(FirstSunkAlloca); - } - } + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst* FirstSunkAlloca = nullptr; + for (auto* II : SinkingCands) { + if (auto* AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); + for (auto* II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveAfter(FirstSunkAlloca); + } + } - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) - cast(II)->moveBefore(TI); - } - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + if (!HoistingCands.empty()) { + auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction* TI = HoistToBlock->getTerminator(); + for (auto* II : HoistingCands) + cast(II)->moveBefore(TI); + } + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } + + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); - CallInst *TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false,VMap); - + CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap); - moveCodeToFunction(newFunction); - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + moveCodeToFunction(newFunction); - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall( + oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode* PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); - for (BasicBlock* ExitBB : ExitBlocks) - for (PHINode& PN : ExitBB->phis()) { - Value* IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } - else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode* PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); - + for (BasicBlock* ExitBB : ExitBlocks) + for (PHINode& PN : ExitBB->phis()) { + Value* IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } + else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); + } // Mark the new function `noreturn` if applicable. Terminators which resume // exception propagation are treated as returning instructions. This is to @@ -2698,36 +2695,17 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } - int b = 0; - + auto HeaderCopy = VMap.lookup(header); assert(HeaderCopy); auto *BranchI2 = BranchInst::Create(header, newRootNode); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock& BB) { - const Instruction* Term = BB.getTerminator(); - if (!Term) return false; // for "newFuncRoot" - return isa(Term) || isa(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); - - - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { - newFunction->dump(); - report_fatal_error("verification of newFunction failed!"); - }); - LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); - LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) - report_fatal_error("Stale Asumption cache for old Function!")); + return newFunction; } + bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { From 595dc0b12620a24c972f89079c9c28dca63e7f9d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 29 Nov 2021 23:26:29 -0600 Subject: [PATCH 007/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 239 -------------------- 1 file changed, 239 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7fd180ac7d346..89de68365de3d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -820,245 +820,6 @@ void CodeExtractor::splitReturnBlocks() { } } -/// constructFunction - make a function based on inputs and outputs, as follows: -/// f(in0, ..., inN, out0, ..., outN) -Function *CodeExtractor::constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *&newRootNode, - BasicBlock *newHeader, - Function *oldFunction, - Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap) { - LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); - LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - - // This function returns unsigned, outputs will go back by reference. - switch (NumExitBlocks) { - case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; - } - - std::vector paramTy; - SmallVector VMapArg; - // Add the types of the input values to the function's argument list - for (Value *value : inputs) { - LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); VMapArg.push_back(value); - } - - // Add the types of the output values to the function's argument list. - for (Value *output : outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - if (AggregateArgs) - paramTy.push_back(output->getType()); - else - paramTy.push_back(PointerType::getUnqual(output->getType())); - } - - LLVM_DEBUG({ - dbgs() << "Function type: " << *RetTy << " f("; - for (Type *i : paramTy) - dbgs() << *i << ", "; - dbgs() << ")\n"; - }); - - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - StructTy = StructType::get(M->getContext(), paramTy); - paramTy.clear(); - paramTy.push_back(PointerType::getUnqual(StructTy)); - } - FunctionType *funcType = - FunctionType::get(RetTy, paramTy, - AllowVarArgs && oldFunction->isVarArg()); - - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; - // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); - - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); - - // Inherit all of the target dependent attributes and white-listed - // target independent attributes. - // (e.g. If the extracted region contains a call to an x86.sse - // instruction we need to make sure that the extracted region has the - // "target-features" attribute allowing it to be lowered. - // FIXME: This should be changed to check to see if a specific - // attribute can not be inherited. - for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { - if (Attr.isStringAttribute()) { - if (Attr.getKindAsString() == "thunk") - continue; - } else - switch (Attr.getKindAsEnum()) { - // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: - case Attribute::AllocSize: - case Attribute::ArgMemOnly: - case Attribute::Builtin: - case Attribute::ByVal: - case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::ElementType: - case Attribute::InAlloca: - case Attribute::InReg: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: - case Attribute::JumpTable: - case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: - case Attribute::NoBuiltin: - case Attribute::NoCapture: - case Attribute::NoMerge: - case Attribute::NoReturn: - case Attribute::NoSync: - case Attribute::NoUndef: - case Attribute::None: - case Attribute::NonNull: - case Attribute::Preallocated: - case Attribute::ReadNone: - case Attribute::ReadOnly: - case Attribute::Returned: - case Attribute::ReturnsTwice: - case Attribute::SExt: - case Attribute::Speculatable: - case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::SwiftAsync: - case Attribute::WillReturn: - case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::ByRef: - case Attribute::EndAttrKinds: - case Attribute::EmptyKey: - case Attribute::TombstoneKey: - continue; - // Those attributes should be safe to propagate to the extracted function. - case Attribute::AlwaysInline: - case Attribute::Cold: - case Attribute::DisableSanitizerInstrumentation: - case Attribute::Hot: - case Attribute::NoRecurse: - case Attribute::InlineHint: - case Attribute::MinSize: - case Attribute::NoCallback: - case Attribute::NoDuplicate: - case Attribute::NoFree: - case Attribute::NoImplicitFloat: - case Attribute::NoInline: - case Attribute::NonLazyBind: - case Attribute::NoRedZone: - case Attribute::NoUnwind: - case Attribute::NoSanitizeCoverage: - case Attribute::NullPointerIsValid: - case Attribute::OptForFuzzing: - case Attribute::OptimizeNone: - case Attribute::OptimizeForSize: - case Attribute::SafeStack: - case Attribute::ShadowCallStack: - case Attribute::SanitizeAddress: - case Attribute::SanitizeMemory: - case Attribute::SanitizeThread: - case Attribute::SanitizeHWAddress: - case Attribute::SanitizeMemTag: - case Attribute::SpeculativeLoadHardening: - case Attribute::StackProtect: - case Attribute::StackProtectReq: - case Attribute::StackProtectStrong: - case Attribute::StrictFP: - case Attribute::UWTable: - case Attribute::VScaleRange: - case Attribute::NoCfCheck: - case Attribute::MustProgress: - case Attribute::NoProfile: - break; - } - - newFunction->addFnAttr(Attr); - } - if (newRootNode) { - newFunction->getBasicBlockList().push_back(newRootNode); - } else { - newRootNode = BasicBlock::Create(newFunction->getContext(), "newFuncRoot", newFunction); - // auto BranchI = BranchInst::Create(newRootNode, newRootNode); // FIXME - } - - - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - if (KeepOldBlocks) { - VMap[ inputs[i]] = RewriteVal ; - } else { - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } - - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); - } - - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); - - return newFunction; -} - Function *CodeExtractor::constructFunction2(const ValueSet &inputs, const ValueSet &outputs, From 391ccdde6fe774cda43441e2f5e659cc94629452 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 29 Nov 2021 23:41:16 -0600 Subject: [PATCH 008/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 10 ++--- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 43 +++++++++---------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 1d97a20da91da..7b4a78906eda1 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -165,11 +165,12 @@ class CodeExtractorAnalysisCache { Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &Inputs, ValueSet &Outputs, bool KeepOldBlocks = false); - Function *extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, + void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &Inputs, ValueSet &Outputs, const BlockFrequency& EntryFreq, const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, Function *newFunction, + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, + Function *oldFunction, Function *newFunction, BasicBlock * codeReplacer, BasicBlock * NewEntry, BasicBlock * newRootNode ); @@ -247,11 +248,6 @@ class CodeExtractorAnalysisCache { void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); void splitReturnBlocks(); - Function *constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *&newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M, bool KeepOldBlocks, ValueToValueMapTy &VMap); void handleParams( Function *oldFunction, Function *newFunction, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 89de68365de3d..623ead20ad0b9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1617,7 +1617,7 @@ void CodeExtractor::analyzeBeforeExtraction( BlockFrequency& EntryFreq, DenseMap &ExitWeights, SmallPtrSet &ExitBlocks) { BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); + // Function *oldFunction = header->getParent(); // Calculate the entry frequency of the new function before we change the root // block. @@ -1652,8 +1652,8 @@ void CodeExtractor::analyzeBeforeExtraction( void CodeExtractor::prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs) { - BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); + // BasicBlock *header = *Blocks.begin(); + // Function *oldFunction = header->getParent(); // If we have any return instructions in the region, split those blocks so // that the return is not in the region. @@ -1761,13 +1761,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); + Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - BasicBlock * newRootNode=newFuncRoot; + BasicBlock *newRootNode=newFuncRoot; @@ -1828,7 +1828,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, newFunction, codeReplacer, nullptr, newRootNode); + extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction, codeReplacer, nullptr, newRootNode); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll // TODO: remove assumes only after moving @@ -1845,14 +1845,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - - auto* BranchI = BranchInst::Create(header, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // newFuncRoot->getInstList().push_back(BranchI); - + // TODO: ByCopy // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. @@ -1879,6 +1877,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, cast(II)->moveBefore(TI); } + // TODO: ByCopy // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack @@ -1889,9 +1888,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value* RewriteVal = NewValues[i]; @@ -1921,21 +1917,25 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap); moveCodeToFunction(newFunction); + + // TODO: ByCopy // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall( oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + + // TODO: ByCopy // Update the branch weights for the exit block. if (BFI && NumExitBlocks > 1) calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + // Loop over all of the PHI nodes in the header and exit blocks, and change // any references to the old incoming edge to be the new incoming edge. for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { @@ -1990,15 +1990,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } -Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs,const BlockFrequency& EntryFreq, +void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs,const BlockFrequency& EntryFreq, const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit,Function *newFunction, + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, + Function *oldFunction, Function *newFunction, BasicBlock * codeReplacer, BasicBlock * NewEntry, BasicBlock * newRootNode ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); + @@ -2395,12 +2396,12 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach // allocas output values are stored in are only in-use in the codeRepl block. insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - auto TheCall =call; + // auto TheCall =call; #endif // Function *oldFunc =oldFunction; - Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); - Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + // Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); + // Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); #if 0 DebugInfoFinder DIFinder; @@ -2461,12 +2462,10 @@ Function *CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCach assert(HeaderCopy); auto *BranchI2 = BranchInst::Create(header, newRootNode); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - - - return newFunction; } + bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { From bfcb05fdbd80c631cd9d32ebd4af1e34a2e07c76 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 1 Dec 2021 13:50:23 -0600 Subject: [PATCH 009/130] WIP --- llvm/include/llvm/Transforms/IPO.h | 4 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 25 +++--- llvm/lib/Transforms/IPO/IROutliner.cpp | 44 +++++---- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 60 ++++++++----- .../Transforms/IROutliner/outlining-calls.ll | 10 ++- .../llvm-extract/extract-block-nonorphan.ll | 89 +++++++++++++++++++ llvm/test/tools/llvm-extract/extract-block.ll | 1 + .../extract-blocks-with-groups.ll | 3 +- llvm/tools/llvm-extract/llvm-extract.cpp | 15 +++- 9 files changed, 196 insertions(+), 55 deletions(-) create mode 100644 llvm/test/tools/llvm-extract/extract-block-nonorphan.ll diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 67b9a93c47b21..bd8e6a95ea253 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 7c178f9a98345..5ff8e9326286b 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,7 +43,7 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {} + BlockExtractor(bool EraseFunctions, bool KeepOldBlocks) : EraseFunctions(EraseFunctions),KeepOldBlocks(KeepOldBlocks) {} bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { @@ -60,6 +60,7 @@ class BlockExtractor { private: SmallVector, 4> GroupsOfBlocks; bool EraseFunctions; + bool KeepOldBlocks; /// Map a function name to groups of blocks. SmallVector>, 4> BlocksByName; @@ -75,8 +76,8 @@ class BlockExtractorLegacyPass : public ModulePass { public: static char ID; BlockExtractorLegacyPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), BE(EraseFunctions) { + bool EraseFunctions, bool KeepOldBlocks) + : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { // We want one group per element of the input list. SmallVector, 4> MassagedGroupsOfBlocks; for (BasicBlock *BB : BlocksToExtract) { @@ -89,13 +90,13 @@ class BlockExtractorLegacyPass : public ModulePass { BlockExtractorLegacyPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), BE(EraseFunctions) { + bool EraseFunctions, bool KeepOldBlocks) + : ModulePass(ID), BE(EraseFunctions,KeepOldBlocks) { BE.init(GroupsOfBlocksToExtract); } BlockExtractorLegacyPass() - : BlockExtractorLegacyPass(SmallVector(), false) {} + : BlockExtractorLegacyPass(SmallVector(), false, false) {} }; } // end anonymous namespace @@ -108,14 +109,14 @@ ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorLegacyPass(); } ModulePass *llvm::createBlockExtractorPass( - const SmallVectorImpl &BlocksToExtract, bool EraseFunctions) { - return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions); + const SmallVectorImpl &BlocksToExtract, bool EraseFunctions, bool KeepOldBlocks) { + return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions, KeepOldBlocks); } ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions) { - return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks) { + return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions,KeepOldBlocks); } /// Gets all of the blocks specified in the input file. @@ -223,7 +224,7 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC, KeepOldBlocks); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); @@ -254,7 +255,7 @@ bool BlockExtractorLegacyPass::runOnModule(Module &M) { PreservedAnalyses BlockExtractorPass::run(Module &M, ModuleAnalysisManager &AM) { - BlockExtractor BE(false); + BlockExtractor BE(false,false); BE.init(SmallVector, 0>()); return BE.runOnModule(M) ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index d3edf55d7f445..13c9f067d6b01 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -60,7 +60,7 @@ static cl::opt NoCostModel( static cl::opt -KeepOldBlocks("iroutline-copy", cl::init(false), +KeepOldBlocks("ir-outlining-copy", cl::init(false), cl::Hidden, cl::desc("Copy instead of moving instructions from original function.")); @@ -242,7 +242,7 @@ void OutlinableRegion::splitCandidate() { FollowBB = nullptr; } -void OutlinableRegion::reattachCandidate() { +void OutlinableRegion::reattachCandidate() { assert(CandidateSplit && "Candidate is not split!"); // The basic block gets reattached like so: @@ -270,22 +270,32 @@ void OutlinableRegion::reattachCandidate() { assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); PrevBB->getTerminator()->eraseFromParent(); - moveBBContents(*StartBB, *PrevBB); - - BasicBlock *PlacementBB = PrevBB; - if (StartBB != EndBB) - PlacementBB = EndBB; - if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { - assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); - assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); - PlacementBB->getTerminator()->eraseFromParent(); - moveBBContents(*FollowBB, *PlacementBB); - PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); - FollowBB->eraseFromParent(); - } - PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); - StartBB->eraseFromParent(); + moveBBContents(*StartBB, *PrevBB); + + BasicBlock* PlacementBB = PrevBB; + if (StartBB != EndBB) + PlacementBB = EndBB; + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { + assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); + assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); + //for (auto Pred : predecessors(FollowBB)) { + // if (Pred == PlacementBB) continue; + // Pred->replaceSuccessorsPhiUsesWith(FollowBB,nullptr); + //} + PlacementBB->getTerminator()->eraseFromParent(); + moveBBContents(*FollowBB, *PlacementBB); + PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + //FollowBB->replaceAllUsesWith(UndefValue::get(FollowBB->getType())); + for (auto &&U : make_early_inc_range( FollowBB->uses())) { + U.set(UndefValue::get(FollowBB->getType())); + } + FollowBB->eraseFromParent(); + } + + PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); + StartBB->eraseFromParent(); + // Make sure to save changes back to the StartBB. StartBB = PrevBB; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 623ead20ad0b9..a21cb312ed92d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2045,7 +2045,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE RewriteVal = &*AI++; if (KeepOldBlocks) { - VMap[ inputs[i]] = RewriteVal ; + VMap[inputs[i]] = RewriteVal ; } else { std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User* use : Users) @@ -2064,17 +2064,20 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE AI->setName(outputs[i]->getName()+".out"); } - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + header->getParent()->viewCFG(); + if (!KeepOldBlocks) { + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + } //return newFunction; @@ -2226,11 +2229,14 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); + + if (!KeepOldBlocks) { + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } } } @@ -2394,7 +2400,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE // Insert lifetime markers around the reloads of any output values. The // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + // insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); // auto TheCall =call; #endif @@ -2434,9 +2440,23 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE // Note return instructions for the caller. // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) // Returns.push_back(RI); + + + for (auto&& P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); + } + } } + for (BasicBlock* Block : Blocks) { + + } + for (auto Pred : predecessors(header)) { VMap[Pred] = newRootNode; @@ -2458,9 +2478,9 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE } - auto HeaderCopy = VMap.lookup(header); + BasicBlock* HeaderCopy = cast( VMap.lookup(header)); assert(HeaderCopy); - auto *BranchI2 = BranchInst::Create(header, newRootNode); + auto *BranchI2 = BranchInst::Create(HeaderCopy, newRootNode); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); } diff --git a/llvm/test/Transforms/IROutliner/outlining-calls.ll b/llvm/test/Transforms/IROutliner/outlining-calls.ll index 4ed7ea69a02d6..fadf6ef0e1594 100644 --- a/llvm/test/Transforms/IROutliner/outlining-calls.ll +++ b/llvm/test/Transforms/IROutliner/outlining-calls.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -iroutline-copy < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -ir-outlining-copy < %s | FileCheck %s --check-prefixes=CHECK,KEEP ; This test checks that we do can outline calls, but only if they have the same ; function type and the same name. @@ -16,6 +16,8 @@ define void @function1() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void +; KEEP: entry_to_outline: ; No predecessors! +; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -39,6 +41,8 @@ define void @function2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void +; KEEP: entry_to_outline: ; No predecessors! +; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -68,6 +72,8 @@ define void @function3() { ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: ret void +; KEEP: entry_to_outline: ; No predecessors! +; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -92,3 +98,5 @@ entry: ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 +; KEEP: entry_to_outline: ; No predecessors! +; KEEP: br label undef diff --git a/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll b/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll new file mode 100644 index 0000000000000..d9a8c443c62a8 --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll @@ -0,0 +1,89 @@ +; RUN: llvm-extract -S -bb "foo:region_start;extractonly;both;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + +; CHECK-LABEL: define void @foo( +; +; CHECK: outsideonly: +; CHECK-NEXT: store i32 0, i32* %arg, align 4 +; CHECK-NEXT: br label %both +; +; CHECK: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %arg) +; CHECK-NEXT: br label %return +; +; CHECK: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %both +; +; CHECK: both: +; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] +; CHECK-NEXT: switch + + +; CHECK-LABEL: define internal void @foo.region_start(i32* %arg) { +; CHECK: br label %region_start +; +; CHECK: return.exitStub: +; CHECK-NEXT: ret void +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: br label %extractonly +; CHECK-EMPTY: +; CHECK-NEXT: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %both +; CHECK-EMPTY: +; CHECK-NEXT: both: +; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %return.exitStub +; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: region_end: +; CHECK-NEXT: br label %return.exitStub +; CHECK-NEXT: } + + +define void @foo(i32* %arg, i1 %c) { +entry: + br i1 %c, label %region_start, label %outsideonly + +outsideonly: + store i32 0, i32* %arg, align 4 + br label %both + +region_start: + ;store i32 %arg, i32* %tmp1, align 4 + ;%tmp2 = load i32, i32* %tmp1, align 4 + ;%tmp3 = icmp sgt i32 %tmp2, 0 + ;store i8 1, i8* %dest, align 4 + ;br i1 %tmp3, label %extractonly, label %bb7 + br label %extractonly + +extractonly: + store i32 1, i32* %arg, align 4 + br label %both + +both: + %dest = phi i8 [0, %outsideonly], [1, %extractonly] + switch i8 %dest, label %fallback [ + i8 0, label %return + i8 1, label %region_end + ] + +fallback: + unreachable + +region_end: + br label %return + +outsidecont: + br label %return + +return: + ret void +} diff --git a/llvm/test/tools/llvm-extract/extract-block.ll b/llvm/test/tools/llvm-extract/extract-block.ll index 7cf0f16033794..614c670032bc4 100644 --- a/llvm/test/tools/llvm-extract/extract-block.ll +++ b/llvm/test/tools/llvm-extract/extract-block.ll @@ -1,4 +1,5 @@ ; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s +; RUN: llvm-extract -S -bb foo:bb4 %s --keep-blocks | FileCheck %s --check-prefix=KEEP ; CHECK: declare void @bar() define void @bar() { diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index 90c92aa61fd8f..36e51579fa917 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -1,4 +1,5 @@ -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --keep-blocks | FileCheck %s --check-prefix=KEEP ; Extract two groups of basic blocks in two different functions. diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index cb1c4116ff192..189513123477b 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,7 +84,7 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest.\n" + "the first block in the sequence should dominate the rest (unless using --bb-keep-blocks)." "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -92,6 +92,17 @@ static cl::list ExtractBlocks( cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); +static cl::opt KeepFunctions("bb-keep-functions", + cl::desc("When extracting blocks from functions, keep the original functions; extracted code is replaced by function call to new function"), + cl::cat(ExtractCat) + ); + +static cl::opt KeepBlocks("bb-keep-blocks", + cl::desc("Keep extracted blocks in original function after outlining, likely orphaned."), + cl::cat(ExtractCat) + ); + + // ExtractAlias - The alias to extract from the module. static cl::list ExtractAliases("alias", cl::desc("Specify alias to extract"), @@ -358,7 +369,7 @@ int main(int argc, char **argv) { } legacy::PassManager PM; - PM.add(createBlockExtractorPass(GroupOfBBs, true)); + PM.add(createBlockExtractorPass(GroupOfBBs, !KeepFunctions, KeepBlocks)); PM.run(*M); } From 25162d3c81507bdb3493cfc5db4a4f1659854c91 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 1 Dec 2021 17:33:57 -0600 Subject: [PATCH 010/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 248 +++++++++++------- llvm/test/tools/llvm-extract/extract-block.ll | 11 +- .../extract-blocks-with-groups.ll | 17 +- 4 files changed, 179 insertions(+), 99 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 7b4a78906eda1..33ff7509fe963 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -170,7 +170,7 @@ class CodeExtractorAnalysisCache { const BlockFrequency& EntryFreq, const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, - Function *oldFunction, Function *newFunction, + Function *oldFunction, Function *newFunction, BasicBlock *header, BasicBlock * codeReplacer, BasicBlock * NewEntry, BasicBlock * newRootNode ); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index a21cb312ed92d..0cbd38df1d2ae 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1727,7 +1727,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } NumExitBlocks = ExitBlocks.size(); + + // analyzis, after ret splitting + // DenseMap ExitingBlocks; for (BasicBlock *Block : Blocks) { Instruction *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -1735,10 +1738,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, continue; BasicBlock *OldTarget = TI->getSuccessor(i); OldTargets.push_back(OldTarget); + // ExitingBlocks[Block] = OldTarget; } } + // canonicalization // If we have to split PHI nodes of the entry or exit blocks, do so now. severSplitPHINodesOfEntry(header); @@ -1811,7 +1816,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!AggregateArgs) { AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); + AI->setName(inputs[i]->getName() + ".y"); for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } @@ -1828,7 +1833,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction, codeReplacer, nullptr, newRootNode); + extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll // TODO: remove assumes only after moving @@ -1994,11 +1999,11 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, - Function *oldFunction, Function *newFunction, + Function *oldFunction, Function *newFunction, BasicBlock *header, BasicBlock * codeReplacer, BasicBlock * NewEntry, BasicBlock * newRootNode ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. - BasicBlock *header = *Blocks.begin(); + // BasicBlock *header = *Blocks.begin(); @@ -2045,7 +2050,8 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE RewriteVal = &*AI++; if (KeepOldBlocks) { - VMap[inputs[i]] = RewriteVal ; + auto In = inputs[i]; + VMap[In] = RewriteVal ; } else { std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User* use : Users) @@ -2054,7 +2060,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE inst->replaceUsesOfWith(inputs[i], RewriteVal); } } - +#if 0 // Set names for input and output arguments. if (!AggregateArgs) { AI = newFunction->arg_begin(); @@ -2063,7 +2069,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } - +#endif header->getParent()->viewCFG(); if (!KeepOldBlocks) { // Rewrite branches to basic blocks outside of the loop to new dummy blocks @@ -2211,26 +2217,33 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); + DenseMap ReloadReplacements; + // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; + Value* Output = nullptr; if (AggregateArgs) { - Value *Idx[2]; + Value* Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( + GetElementPtrInst* GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; - } else { + } + else { Output = ReloadOutputs[i]; } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); - if (!KeepOldBlocks) { + if (KeepOldBlocks) { + auto OrigOut = outputs[i]; + //VMap[Out] = load; + ReloadReplacements[OrigOut] = load; + } else { std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction* inst = cast(Users[u]); @@ -2259,38 +2272,87 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE if (Blocks.count(OldTarget)) continue; BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; + if (NewTarget) continue; - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; - - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + unsigned SuccNum = switchVal++; + + Value* brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); + ReturnInst::Create(Context, brVal, NewTarget); + + auto OldPredecessor = OldTarget->getUniquePredecessor(); + + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + if (KeepOldBlocks) { + // for (auto T : OldTargets) { + DenseMap OutRepl; + for (auto&& P : OldTarget->phis()) { + int NumIncoming = P.getNumIncomingValues(); + for (int i = 0; i < NumIncoming; ++i) { + auto OldVal = P.getIncomingValue(i); + auto ReplVal = ReloadReplacements.lookup(OldVal); + if (ReplVal) { + P.addIncoming(ReplVal,codeReplacer); + OutRepl[OldVal] =& P; + break; + } + } + } + //} + + if (OldPredecessor) { + for (auto&& O : outputs) { + auto& PHI = OutRepl[O]; + + for (auto&& U : make_early_inc_range(O->uses())) { + auto *User = dyn_cast(U.getUser()); + if (!User) continue; + if (Blocks.count(User->getParent())) continue; + // if (User->getParent() == OldTarget && isa(User)) continue; + if (auto P = dyn_cast(User)) { + auto Incoming = P->getIncomingBlock(U.getOperandNo()); + if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; + } + + if (!PHI) { + auto ReplVal = ReloadReplacements.lookup(O); + PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); + PHI->addIncoming(O, OldPredecessor); + PHI->addIncoming(ReplVal, codeReplacer); + } + + U.set(PHI); + } + } + } + } } + + + //if (!KeepOldBlocks) #if 1 for (BasicBlock *Block : Blocks) { @@ -2310,49 +2372,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE } #endif - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } + // Now that we've done the deed, simplify the switch instruction. Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); @@ -2453,12 +2473,11 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE } - for (BasicBlock* Block : Blocks) { - - } - + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; VMap[Pred] = newRootNode; } @@ -2466,18 +2485,65 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE // Loop over all of the instructions in the new function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (BasicBlock* Block : Blocks) { - auto NewBlock = VMap.lookup(Block); + WeakTrackingVH NewBlock = VMap.lookup(Block); if (!NewBlock) { continue; } - // BasicBlock *Y =NewBlock; + BasicBlock &Y = cast (*NewBlock); // Loop over all instructions, fixing each one as we find it... - for (Instruction& II : cast (*NewBlock)) + for (Instruction& II : Y) RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; + } + } + + BasicBlock* HeaderCopy = cast( VMap.lookup(header)); assert(HeaderCopy); auto *BranchI2 = BranchInst::Create(HeaderCopy, newRootNode); diff --git a/llvm/test/tools/llvm-extract/extract-block.ll b/llvm/test/tools/llvm-extract/extract-block.ll index 614c670032bc4..4849e1e917783 100644 --- a/llvm/test/tools/llvm-extract/extract-block.ll +++ b/llvm/test/tools/llvm-extract/extract-block.ll @@ -1,5 +1,6 @@ -; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s -; RUN: llvm-extract -S -bb foo:bb4 %s --keep-blocks | FileCheck %s --check-prefix=KEEP +; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -S -bb foo:bb4 %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP + ; CHECK: declare void @bar() define void @bar() { @@ -13,7 +14,11 @@ bb: ret void } -; CHECK: @foo.bb4 +; KEEP-LABEL: define i32 @foo(i32 %arg) { +; KEEP: call void @foo.bb4 + +; KILL-LABEL: define dso_local void @foo.bb4( +; KEEP-LABEL: define internal void @foo.bb4( ; CHECK: call void @bar() ; CHECK: %tmp5 define i32 @foo(i32 %arg) { diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index 36e51579fa917..94e91c5c54c21 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -1,11 +1,15 @@ -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --keep-blocks | FileCheck %s --check-prefix=KEEP +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP ; Extract two groups of basic blocks in two different functions. +; KEEP-LABEL: define i32 @foo(i32 %arg, i32 %arg1) { +; KEEP: call @foo.if.split( + ; The first extracted function is the region composed by the ; blocks if, then, and else from foo. -; CHECK: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { +; KILL-LABEL: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { +; KEEP-LABEL: define internal void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { ; CHECK: newFuncRoot: ; CHECK: br label %if.split ; @@ -35,9 +39,14 @@ ; CHECK: ret void ; CHECK: } + +; KEEP-LABEL: define i32 @bar(i32 %arg, i32 %arg1) { +; KEEP: call @bar.bb14( + ; The second extracted function is the region composed by the blocks ; bb14 and bb20 from bar. -; CHECK: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; KILL-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; KEEP-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { ; CHECK: newFuncRoot: ; CHECK: br label %bb14 ; From 8aa60615e6cd8725a447a5cbdd67df55ffe2d419 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 1 Dec 2021 17:52:14 -0600 Subject: [PATCH 011/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 73 ++++++++++--------- .../llvm-extract/extract-block-nonorphan.ll | 8 +- .../extract-blocks-with-groups.ll | 29 ++++---- 3 files changed, 57 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0cbd38df1d2ae..5785eff04cc3a 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1816,7 +1816,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!AggregateArgs) { AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName() + ".y"); + AI->setName(inputs[i]->getName()); for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } @@ -2258,6 +2258,43 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); + + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock *Block : Blocks) { + BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction,Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); + + + for (auto&& P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); + } + } + } + + + + // Since there may be multiple exits from the original region, make the new // function return an unsigned, switch on that number. This loop iterates // over all of the blocks in the extracted region, updating any terminator @@ -2439,41 +2476,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE #endif - auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock *Block : Blocks) { - BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction,Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - - for (auto&& P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); - } - } - } - - - for (auto Pred : predecessors(header)) { if (VMap.count(Pred)) diff --git a/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll b/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll index d9a8c443c62a8..47b6821aefa32 100644 --- a/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll +++ b/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll @@ -23,10 +23,7 @@ ; CHECK-LABEL: define internal void @foo.region_start(i32* %arg) { ; CHECK: br label %region_start ; -; CHECK: return.exitStub: -; CHECK-NEXT: ret void -; CHECK-EMPTY: -; CHECK-NEXT: region_start: +; CHECK: region_start: ; CHECK-NEXT: br label %extractonly ; CHECK-EMPTY: ; CHECK-NEXT: extractonly: @@ -45,6 +42,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: region_end: ; CHECK-NEXT: br label %return.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: return.exitStub: +; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index 94e91c5c54c21..8ee1c69038c98 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -4,7 +4,11 @@ ; KEEP-LABEL: define i32 @foo(i32 %arg, i32 %arg1) { -; KEEP: call @foo.if.split( +; KEEP: call void @foo.if.split( + +; KEEP-LABEL: define i32 @bar(i32 %arg, i32 %arg1) { +; KEEP: %targetBlock = call i1 @bar.bb14( + ; The first extracted function is the region composed by the ; blocks if, then, and else from foo. @@ -13,59 +17,56 @@ ; CHECK: newFuncRoot: ; CHECK: br label %if.split ; -; CHECK: then: ; preds = %if.split +; CHECK: then: ; CHECK: %tmp12 = shl i32 %arg1, 2 ; CHECK: %tmp13 = add nsw i32 %tmp12, %arg ; CHECK: br label %end.split ; -; CHECK: else: ; preds = %if.split +; CHECK: else: ; CHECK: %tmp22 = mul nsw i32 %arg, 3 ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: br label %end.split ; -; CHECK: if.split: ; preds = %newFuncRoot +; CHECK: if.split: ; CHECK: %tmp5 = icmp sgt i32 %arg, 0 ; CHECK: %tmp8 = icmp sgt i32 %arg1, 0 ; CHECK: %or.cond = and i1 %tmp5, %tmp8 ; CHECK: br i1 %or.cond, label %then, label %else ; -; CHECK: end.split: ; preds = %then, %else +; CHECK: end.split: ; CHECK: %tmp.0.ce = phi i32 [ %tmp13, %then ], [ %tmp25, %else ] ; CHECK: store i32 %tmp.0.ce, i32* %tmp.0.ce.out ; CHECK: br label %end.exitStub ; -; CHECK: end.exitStub: ; preds = %end.split +; CHECK: end.exitStub: ; CHECK: ret void ; CHECK: } -; KEEP-LABEL: define i32 @bar(i32 %arg, i32 %arg1) { -; KEEP: call @bar.bb14( - ; The second extracted function is the region composed by the blocks ; bb14 and bb20 from bar. ; KILL-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { -; KEEP-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; KEEP-LABEL: define internal i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { ; CHECK: newFuncRoot: ; CHECK: br label %bb14 ; -; CHECK: bb14: ; preds = %newFuncRoot +; CHECK: bb14: ; CHECK: %tmp0 = and i32 %arg1, %arg ; CHECK: %tmp1 = icmp slt i32 %tmp0, 0 ; CHECK: br i1 %tmp1, label %bb20, label %bb26.exitStub ; -; CHECK: bb20: ; preds = %bb14 +; CHECK: bb20: ; CHECK: %tmp22 = mul nsw i32 %arg, 3 ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: store i32 %tmp25, i32* %tmp25.out ; CHECK: br label %bb30.exitStub ; -; CHECK: bb26.exitStub: ; preds = %bb14 +; CHECK: bb26.exitStub: ; CHECK: ret i1 true ; -; CHECK: bb30.exitStub: ; preds = %bb20 +; CHECK: bb30.exitStub: ; CHECK: ret i1 false ; CHECK: } From 68f6c4b05da93df0e9005608f7990d5b4074a4b8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 1 Dec 2021 21:00:15 -0600 Subject: [PATCH 012/130] WIP --- llvm/lib/Transforms/IPO/IROutliner.cpp | 40 +++++++++---------- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 24 +++++------ .../Transforms/IROutliner/outlining-calls.ll | 10 +---- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 13c9f067d6b01..e2796ebdcbb94 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -38,8 +38,6 @@ using namespace IRSimilarity; // matching and outlining. extern cl::opt DisableBranches; - - // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -270,32 +268,30 @@ void OutlinableRegion::reattachCandidate() { assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); PrevBB->getTerminator()->eraseFromParent(); + moveBBContents(*StartBB, *PrevBB); - moveBBContents(*StartBB, *PrevBB); - - BasicBlock* PlacementBB = PrevBB; - if (StartBB != EndBB) - PlacementBB = EndBB; - if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { - assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); - assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); + BasicBlock *PlacementBB = PrevBB; + if (StartBB != EndBB) + PlacementBB = EndBB; + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr && FollowBB->getSinglePredecessor()) { + assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); + assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); //for (auto Pred : predecessors(FollowBB)) { // if (Pred == PlacementBB) continue; // Pred->replaceSuccessorsPhiUsesWith(FollowBB,nullptr); //} - PlacementBB->getTerminator()->eraseFromParent(); - moveBBContents(*FollowBB, *PlacementBB); - PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); - //FollowBB->replaceAllUsesWith(UndefValue::get(FollowBB->getType())); - for (auto &&U : make_early_inc_range( FollowBB->uses())) { - U.set(UndefValue::get(FollowBB->getType())); - } - FollowBB->eraseFromParent(); - } + PlacementBB->getTerminator()->eraseFromParent(); + moveBBContents(*FollowBB, *PlacementBB); + PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + //FollowBB->replaceAllUsesWith(UndefValue::get(FollowBB->getType())); +// for (auto &&U : make_early_inc_range( FollowBB->uses())) { +// U.set(UndefValue::get(FollowBB->getType())); +// } + FollowBB->eraseFromParent(); + } - PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); - StartBB->eraseFromParent(); - + PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); + StartBB->eraseFromParent(); // Make sure to save changes back to the StartBB. StartBB = PrevBB; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 5785eff04cc3a..6f47dd986f4b3 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1830,6 +1830,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + if (KeepOldBlocks) { @@ -1906,18 +1918,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); - diff --git a/llvm/test/Transforms/IROutliner/outlining-calls.ll b/llvm/test/Transforms/IROutliner/outlining-calls.ll index fadf6ef0e1594..355006bdac728 100644 --- a/llvm/test/Transforms/IROutliner/outlining-calls.ll +++ b/llvm/test/Transforms/IROutliner/outlining-calls.ll @@ -15,9 +15,8 @@ define void @function1() { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; KEEP: entry_after_outline: ; CHECK-NEXT: ret void -; KEEP: entry_to_outline: ; No predecessors! -; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -40,9 +39,8 @@ define void @function2() { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; KEEP: entry_after_outline: ; CHECK-NEXT: ret void -; KEEP: entry_to_outline: ; No predecessors! -; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -72,8 +70,6 @@ define void @function3() { ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: ret void -; KEEP: entry_to_outline: ; No predecessors! -; KEEP: br label undef ; entry: %a = alloca i32, align 4 @@ -98,5 +94,3 @@ entry: ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 -; KEEP: entry_to_outline: ; No predecessors! -; KEEP: br label undef From 47c8395884392551837bff8e2e9ad5ccd60b02ad Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Dec 2021 16:31:15 -0600 Subject: [PATCH 013/130] WIP --- llvm/include/llvm/Transforms/IPO.h | 4 +- .../llvm/Transforms/Utils/CodeExtractor.h | 43 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 4 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 746 ++++++++++-------- .../IROutliner/outlining-multiple-exits.ll | 1 + 5 files changed, 435 insertions(+), 363 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index bd8e6a95ea253..e48677088b2ad 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks); + bool EraseFunctions, bool KeepOldBlocks =false); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks); + bool EraseFunctions, bool KeepOldBlocks=false); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 33ff7509fe963..95217e62a7fc7 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -110,12 +110,9 @@ class CodeExtractorAnalysisCache { // label, if non-empty, otherwise "extracted". std::string Suffix; - // bool DeleteOldBlocks; - void analyzeBeforeExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, BlockFrequency &EntryFreq,DenseMap &ExitWeights, SmallPtrSet &ExitBlocks); - void prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs); public: /// Create a code extractor for a sequence of blocks. @@ -160,20 +157,13 @@ class CodeExtractorAnalysisCache { /// newly outlined function. /// \param Outputs [out] - filled with values marked as outputs to the /// newly outlined function. + /// \param KeepOldBlocks If true, the original instances of the extracted region remain; instead of moving them to the new function they are copied. /// \returns zero when called on a CodeExtractor instance where isEligible /// returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &Inputs, ValueSet &Outputs, bool KeepOldBlocks = false); - void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, - ValueSet &Inputs, ValueSet &Outputs, - const BlockFrequency& EntryFreq, - const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, - Function *oldFunction, Function *newFunction, BasicBlock *header, - BasicBlock * codeReplacer, - BasicBlock * NewEntry, - BasicBlock * newRootNode ); + /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as @@ -263,6 +253,26 @@ class CodeExtractorAnalysisCache { //, bool KeepOldBlocks, ValueToValueMapTy &VMap ); + void analyzeBeforeExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, BlockFrequency &EntryFreq,DenseMap &ExitWeights, SmallPtrSet &ExitBlocks); + + + void prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs); + + + void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, + ValueSet &Inputs, ValueSet &Outputs, + const BlockFrequency& EntryFreq, + const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, + const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, + Function *oldFunction, Function *newFunction, BasicBlock *header, + BasicBlock * codeReplacer, + BasicBlock * NewEntry, + BasicBlock * newRootNode , + std::vector ¶ms, + std::vector &StructValues, + SmallVectorImpl &SwiftErrorArgs + ); + void moveCodeToFunction(Function *newFunction); void calculateNewCallTerminatorWeights( @@ -270,11 +280,14 @@ class CodeExtractorAnalysisCache { DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - - CallInst *emitCallAndSwitchStatement(Function *newFunction, BasicBlock *newHeader, - ValueSet &inputs, ValueSet &outputs,bool KeepOldBlocks , ValueToValueMapTy &VMap); + ValueSet &inputs, ValueSet &outputs,bool KeepOldBlocks , + ValueToValueMapTy &VMap, + std::vector ¶ms, + std::vector &StructValues, + SmallVectorImpl &SwiftErrorArgs + ); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 5ff8e9326286b..2556292177d45 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,7 +43,7 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions, bool KeepOldBlocks) : EraseFunctions(EraseFunctions),KeepOldBlocks(KeepOldBlocks) {} + BlockExtractor(bool EraseFunctions, bool KeepOldBlocks=false) : EraseFunctions(EraseFunctions),KeepOldBlocks(KeepOldBlocks) {} bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { @@ -255,7 +255,7 @@ bool BlockExtractorLegacyPass::runOnModule(Module &M) { PreservedAnalyses BlockExtractorPass::run(Module &M, ModuleAnalysisManager &AM) { - BlockExtractor BE(false,false); + BlockExtractor BE(false); BE.init(SmallVector, 0>()); return BE.runOnModule(M) ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 6f47dd986f4b3..eaed2905a40d9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1106,7 +1106,11 @@ static void insertLifetimeMarkersSurroundingCall( CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap, + std::vector ¶ms, + std::vector &StructValues, + SmallVectorImpl &SwiftErrorArgs + ) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector ReloadOutputs, Reloads; @@ -1118,30 +1122,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, // TOOD: Pass AllocaBlock - BasicBlock *AllocaBlock ; - if (KeepOldBlocks) { - AllocaBlock = &newFunction->front(); - } else { - AllocaBlock = &codeReplacer->getParent()->front(); - } + BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); + - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - std::vector params; - std::vector StructValues; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; - } - // Create allocas for the outputs for (Value *output : outputs) { @@ -1764,6 +1748,22 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); +#if 0 + DenseMap> ExitValues; + for (auto&& O : outputs) { + auto &&I = cast(O); + for (auto &&U : I->uses()) { + auto User = dyn_cast(U.getUser()); + if (!User) continue; + if (Blocks.count(User->getParent())) continue; + + for (auto &&E : ExitBlocks) { + if (DT->dominates(E, User->getParent()) + ExitValues[E].push_back(cast O); + } + } + } +#endif // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); @@ -1821,6 +1821,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, AI->setName(outputs[i]->getName()+".out"); } + + // Add inputs as params, or to be filled into the struct + unsigned ArgNo = 0; + std::vector params; + std::vector StructValues; + SmallVector SwiftErrorArgs; + for (Value *input : inputs) { + if (AggregateArgs) + StructValues.push_back(input); + else { + params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ArgNo); + } + ++ArgNo; + } + + // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); @@ -1845,7 +1863,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode); + extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode, + params, + StructValues, + SwiftErrorArgs + ); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll // TODO: remove assumes only after moving @@ -1922,7 +1944,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap); + CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap, + params, + StructValues, + SwiftErrorArgs + ); moveCodeToFunction(newFunction); @@ -1995,13 +2021,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } -void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs,const BlockFrequency& EntryFreq, - const DenseMap &ExitWeights, - const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, - Function *oldFunction, Function *newFunction, BasicBlock *header, - BasicBlock * codeReplacer, - BasicBlock * NewEntry, BasicBlock * newRootNode ) { +void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CEAC, ValueSet& inputs, ValueSet& outputs, const BlockFrequency& EntryFreq, + const DenseMap& ExitWeights, + const SmallPtrSet& ExitBlocks, + const ValueSet& SinkingCands, const ValueSet& HoistingCands, BasicBlock* CommonExit, + Function* oldFunction, Function* newFunction, BasicBlock* header, + BasicBlock* codeReplacer, + BasicBlock* NewEntry, BasicBlock* newRootNode, + std::vector ¶ms, + std::vector &StructValues, + SmallVectorImpl &SwiftErrorArgs +) { // Assumption: this is a single-entry code region, and the header is the first block in the region. // BasicBlock *header = *Blocks.begin(); @@ -2011,384 +2041,412 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CE - auto newHeader = codeReplacer; - ValueToValueMapTy VMap; - Module *M = oldFunction->getParent(); - auto KeepOldBlocks = true; + auto newHeader = codeReplacer; + ValueToValueMapTy VMap; + Module* M = oldFunction->getParent(); + auto KeepOldBlocks = true; - - // TODO: Make StructTy a field - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - //StructTy = StructType::get(M->getContext(), paramTy); - StructTy = cast( newFunction->getArg(0)->getType()); - } + // TODO: Make StructTy a field + StructType* StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + //StructTy = StructType::get(M->getContext(), paramTy); + StructTy = cast(newFunction->getArg(0)->getType()); + } - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - if (KeepOldBlocks) { - auto In = inputs[i]; - VMap[In] = RewriteVal ; - } else { - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* RewriteVal; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction* TI = newFunction->begin()->getTerminator(); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); } -#if 0 - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); + else + RewriteVal = &*AI++; + + if (KeepOldBlocks) { + auto In = inputs[i]; + VMap[In] = RewriteVal; } -#endif - header->getParent()->viewCFG(); - if (!KeepOldBlocks) { - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + else { + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); } + } +#if 0 + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName() + ".out"); + } +#endif + header->getParent()->viewCFG(); + if (!KeepOldBlocks) { + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + } - //return newFunction; + //return newFunction; - BasicBlock *AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); - auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); + auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - // Recursive calls to oldFunction still call the old Function from extracted function. + // Recursive calls to oldFunction still call the old Function from extracted function. - VMap[oldFunction] = oldFunction; + VMap[oldFunction] = oldFunction; #if 0 - CallInst *TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs,true, VMap); - /* - CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { - */ + CallInst* TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs, true, VMap); + /* + CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + */ #else - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector ReloadOutputs, Reloads; - // Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; + // Module *M = newFunction->getParent(); + LLVMContext& Context = M->getContext(); + const DataLayout& DL = M->getDataLayout(); + CallInst* call = nullptr; #if 0 - BasicBlock *AllocaBlock ; - if (KeepOldBlocks) { - AllocaBlock = &newFunction->front(); - } else { - AllocaBlock = &codeReplacer->getParent()->front(); - } + BasicBlock* AllocaBlock; + if (KeepOldBlocks) { + AllocaBlock = &newFunction->front(); + } + else { + AllocaBlock = &codeReplacer->getParent()->front(); + } #endif #if 0 - auto NewAlloca = [&](Type *Ty, unsigned AddrSpace, Value *ArraySize, - const Twine &Name) { - if (!KeepOldBlocks) - return new AllocaInst(Ty, AddrSpace,ArraySize, Name, &codeReplacer->getParent()->front().front()); - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); - }; + auto NewAlloca = [&](Type* Ty, unsigned AddrSpace, Value* ArraySize, + const Twine& Name) { + if (!KeepOldBlocks) + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &codeReplacer->getParent()->front().front()); + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); + }; #endif - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; - } + - // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } else { - AllocaInst *alloca = - // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); + // Create allocas for the outputs + for (Value* output : outputs) { + if (AggregateArgs) { + StructValues.push_back(output); + } + else { + AllocaInst* alloca = + // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); #if 1 - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &AllocaBlock->front()); + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &codeReplacer->getParent()->front().front()); #endif - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); + StructType* StructArgTy = nullptr; + AllocaInst* Struct = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector ArgTypes; + for (Value* V : StructValues) + ArgTypes.push_back(V->getType()); - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); #if 1 - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &AllocaBlock->front()); + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", + &codeReplacer->getParent()->front().front()); #endif - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } + params.push_back(Struct); - // Emit the call to the function - call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + // Emit the call to the function + call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + codeReplacer->getInstList().push_back(call); - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - DenseMap ReloadReplacements; - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* Output = nullptr; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } - else { - Output = ReloadOutputs[i]; - } - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); - Reloads.push_back(load); - - if (KeepOldBlocks) { - auto OrigOut = outputs[i]; - //VMap[Out] = load; - ReloadReplacements[OrigOut] = load; - } else { - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + DenseMap ReloadReplacements; + DenseMap ReloadAddress; + DenseMap SpillAddress; - auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock *Block : Blocks) { - BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction,Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value* Output = nullptr; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } + else { + Output = ReloadOutputs[i]; + } + ReloadAddress[outputs[i]] = Output; - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); + // new StoreInst(outputs[i]->getType(), Output, ); + SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); - for (auto&& P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); - } - } + continue; + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", + codeReplacer); + Reloads.push_back(load); + + if (KeepOldBlocks) { + auto OrigOut = outputs[i]; + //VMap[Out] = load; + ReloadReplacements[OrigOut] = load; + } else { + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); } + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst* TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock* Block : Blocks) { + BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); + // Add basic block mapping. + VMap[Block] = CBB; + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) + for (auto&& P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; - - Value* brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); + } + } + } + + + + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. + unsigned switchVal = 0; + for (BasicBlock* OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + unsigned SuccNum = switchVal++; + + Value* brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + + // auto OldPredecessor = OldTarget->getUniquePredecessor(); + + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + + +#if 0 + if (KeepOldBlocks) { + // for (auto T : OldTargets) { + DenseMap OutRepl; + for (auto&& P : OldTarget->phis()) { + int NumIncoming = P.getNumIncomingValues(); + for (int i = 0; i < NumIncoming; ++i) { + auto OldVal = P.getIncomingValue(i); + auto ReplVal = ReloadReplacements.lookup(OldVal); + if (ReplVal) { + P.addIncoming(ReplVal, codeReplacer); + OutRepl[OldVal] = &P; break; } + } + } - ReturnInst::Create(Context, brVal, NewTarget); - auto OldPredecessor = OldTarget->getUniquePredecessor(); + SmallPtrSet OriginalPreds; + for (auto Pred : predecessors(OldTarget)) { + if (Blocks.count(Pred)) continue; + if (Pred == codeReplacer)continue; + OriginalPreds.insert(Pred); + } + + if (OriginalPreds.size() == 1) { + auto OldPredecessor = *OriginalPreds.begin(); + for (auto&& O : outputs) { + auto& PHI = OutRepl[O]; + if (!PHI) { + auto ReplVal = ReloadReplacements.lookup(O); + PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); + PHI->addIncoming(O, OldPredecessor); + PHI->addIncoming(ReplVal, codeReplacer); + } + - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - - if (KeepOldBlocks) { - // for (auto T : OldTargets) { - DenseMap OutRepl; - for (auto&& P : OldTarget->phis()) { - int NumIncoming = P.getNumIncomingValues(); - for (int i = 0; i < NumIncoming; ++i) { - auto OldVal = P.getIncomingValue(i); - auto ReplVal = ReloadReplacements.lookup(OldVal); - if (ReplVal) { - P.addIncoming(ReplVal,codeReplacer); - OutRepl[OldVal] =& P; - break; - } - } +#if 0 + for (auto&& U : make_early_inc_range(O->uses())) { + auto* User = dyn_cast(U.getUser()); + if (!User) continue; + //if (!DT->dominates(OldTarget, User->getParent())) continue; + if (VMap.lookup(User)) continue; + if (Blocks.count(User->getParent())) continue; + if (User->getParent()->getParent() != oldFunction) continue; + // if (User->getParent() == OldTarget && isa(User)) continue; + if (auto P = dyn_cast(User)) { + auto Incoming = P->getIncomingBlock(U.getOperandNo()); + if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; } - //} - - if (OldPredecessor) { - for (auto&& O : outputs) { - auto& PHI = OutRepl[O]; - - for (auto&& U : make_early_inc_range(O->uses())) { - auto *User = dyn_cast(U.getUser()); - if (!User) continue; - if (Blocks.count(User->getParent())) continue; - // if (User->getParent() == OldTarget && isa(User)) continue; - if (auto P = dyn_cast(User)) { - auto Incoming = P->getIncomingBlock(U.getOperandNo()); - if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; - } - - if (!PHI) { - auto ReplVal = ReloadReplacements.lookup(O); - PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); - PHI->addIncoming(O, OldPredecessor); - PHI->addIncoming(ReplVal, codeReplacer); - } - - U.set(PHI); - } - } + + if (!PHI) { + auto ReplVal = ReloadReplacements.lookup(O); + PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); + PHI->addIncoming(O, OldPredecessor); + PHI->addIncoming(ReplVal, codeReplacer); } + + U.set(PHI); } +#endif + } } + } +#endif + } + + + for (auto&& O : outputs) { } - //if (!KeepOldBlocks) #if 1 diff --git a/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll b/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll index 210da7b042e14..a07b81bd3f4d2 100644 --- a/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll +++ b/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -ir-outlining-copy < %s | FileCheck %s --check-prefixes=KEEP ; Here we have multiple exits, but the different sources, same outputs are ; needed, this checks that they are compressed, and moved into the appropriate From 1e8b833aab6209dead82f9164a0a22ef60f69aaa Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Dec 2021 16:48:46 -0600 Subject: [PATCH 014/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 6 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 71 +++++++++++-------- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 95217e62a7fc7..7a8ec53a2a7cc 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -270,7 +270,8 @@ class CodeExtractorAnalysisCache { BasicBlock * newRootNode , std::vector ¶ms, std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs + SmallVectorImpl &SwiftErrorArgs, + std::vector & ReloadOutputs,std::vector & Reloads ); void moveCodeToFunction(Function *newFunction); @@ -286,7 +287,8 @@ class CodeExtractorAnalysisCache { ValueToValueMapTy &VMap, std::vector ¶ms, std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs + SmallVectorImpl &SwiftErrorArgs, + std::vector & ReloadOutputs,std::vector & Reloads ); }; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index eaed2905a40d9..d7afb09e70ba3 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1109,11 +1109,12 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap, std::vector ¶ms, std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs + SmallVectorImpl &SwiftErrorArgs, + std::vector & ReloadOutputs,std::vector & Reloads ) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector ReloadOutputs, Reloads; + //std::vector ReloadOutputs, Reloads; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); @@ -1127,23 +1128,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } else { - AllocaInst *alloca = - // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); -#if 1 - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &AllocaBlock->front()); -#endif - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } - StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; @@ -1821,6 +1805,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, AI->setName(outputs[i]->getName()+".out"); } + std::vector ReloadOutputs; + std::vector Reloads; + // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; @@ -1838,6 +1825,28 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ++ArgNo; } + // TOOD: Pass AllocaBlock + BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); + Module* M = oldFunction->getParent(); + const DataLayout& DL = M->getDataLayout(); + + // Create allocas for the outputs + for (Value *output : outputs) { + if (AggregateArgs) { + StructValues.push_back(output); + } else { + AllocaInst *alloca = + // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); +#if 1 + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &AllocaBlock->front()); +#endif + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + // Update the entry count of the function. if (BFI) { @@ -1866,7 +1875,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode, params, StructValues, - SwiftErrorArgs + SwiftErrorArgs,ReloadOutputs, + Reloads ); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -1916,6 +1926,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, cast(II)->moveBefore(TI); } + // TODO: ByCopy // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these @@ -1947,7 +1958,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap, params, StructValues, - SwiftErrorArgs + SwiftErrorArgs,ReloadOutputs,Reloads ); @@ -2030,14 +2041,13 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE BasicBlock* NewEntry, BasicBlock* newRootNode, std::vector ¶ms, std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs + SmallVectorImpl &SwiftErrorArgs, + std::vector & ReloadOutputs,std::vector & Reloads ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. // BasicBlock *header = *Blocks.begin(); - - - + @@ -2102,7 +2112,9 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE AI->setName(outputs[i]->getName() + ".out"); } #endif - header->getParent()->viewCFG(); + + if (false) header->getParent()->viewCFG(); + if (!KeepOldBlocks) { // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which @@ -2139,7 +2151,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE #else // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector ReloadOutputs, Reloads; + // std::vector ReloadOutputs, Reloads; // Module *M = newFunction->getParent(); LLVMContext& Context = M->getContext(); @@ -2166,7 +2178,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE #endif - +#if 0 // Create allocas for the outputs for (Value* output : outputs) { if (AggregateArgs) { @@ -2184,6 +2196,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE params.push_back(alloca); } } +#endif StructType* StructArgTy = nullptr; AllocaInst* Struct = nullptr; @@ -2243,6 +2256,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE DenseMap ReloadAddress; DenseMap SpillAddress; +#if 0 // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value* Output = nullptr; @@ -2283,6 +2297,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE } } } +#endif // Now we can emit a switch statement using the call as a value. SwitchInst* TheSwitch = From c0200bdc3684d7d0ed570da8d6a4de11ff505119 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Dec 2021 17:39:50 -0600 Subject: [PATCH 015/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 9 ++- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 80 +++++++++++-------- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 7a8ec53a2a7cc..361a76c02f1e0 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -37,6 +37,7 @@ class Loop; class Module; class Type; class Value; +class StructType; /// A cache for the CodeExtractor analysis. The operation \ref /// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this @@ -271,7 +272,9 @@ class CodeExtractorAnalysisCache { std::vector ¶ms, std::vector &StructValues, SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads + std::vector & ReloadOutputs,std::vector & Reloads, + StructType *StructArgTy , + AllocaInst *Struct ); void moveCodeToFunction(Function *newFunction); @@ -288,7 +291,9 @@ class CodeExtractorAnalysisCache { std::vector ¶ms, std::vector &StructValues, SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads + std::vector & ReloadOutputs,std::vector & Reloads, + StructType *StructArgTy , + AllocaInst *Struct ); }; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d7afb09e70ba3..14be3129306ba 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1110,7 +1110,9 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, std::vector ¶ms, std::vector &StructValues, SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads + std::vector & ReloadOutputs,std::vector & Reloads, + StructType *StructArgTy , + AllocaInst *Struct ) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs @@ -1128,35 +1130,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); -#if 1 - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &AllocaBlock->front()); -#endif - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } - - // Emit the call to the function call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); @@ -1848,6 +1821,37 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + LLVMContext &Context = M->getContext(); + + + StructType *StructArgTy = nullptr; + AllocaInst *Struct = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector ArgTypes; + for (Value *V : StructValues) + ArgTypes.push_back(V->getType()); + + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); +#if 1 + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", + &AllocaBlock->front()); +#endif + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + + // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); @@ -1874,9 +1878,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode, params, - StructValues, - SwiftErrorArgs,ReloadOutputs, - Reloads + StructValues, + SwiftErrorArgs,ReloadOutputs, + Reloads, + StructArgTy, Struct ); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -1958,7 +1963,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap, params, StructValues, - SwiftErrorArgs,ReloadOutputs,Reloads + SwiftErrorArgs,ReloadOutputs,Reloads, + StructArgTy, Struct ); @@ -2042,7 +2048,9 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE std::vector ¶ms, std::vector &StructValues, SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads + std::vector & ReloadOutputs,std::vector & Reloads, + StructType *StructArgTy , + AllocaInst *Struct ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. // BasicBlock *header = *Blocks.begin(); @@ -2198,6 +2206,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE } #endif +#if 0 StructType* StructArgTy = nullptr; AllocaInst* Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { @@ -2225,6 +2234,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE new StoreInst(StructValues[i], GEP, codeReplacer); } } +#endif // Emit the call to the function call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); From 30dbbf0ed8162541c581d2ac3ed5dcc1e029d135 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Dec 2021 17:46:00 -0600 Subject: [PATCH 016/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 487 +++++++++--------- 2 files changed, 235 insertions(+), 264 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 361a76c02f1e0..33f8320dee9e6 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -284,17 +284,7 @@ class CodeExtractorAnalysisCache { DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - CallInst *emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *newHeader, - ValueSet &inputs, ValueSet &outputs,bool KeepOldBlocks , - ValueToValueMapTy &VMap, - std::vector ¶ms, - std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads, - StructType *StructArgTy , - AllocaInst *Struct - ); + }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 14be3129306ba..12d44b9d8cc9a 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1100,258 +1100,6 @@ static void insertLifetimeMarkersSurroundingCall( -/// emitCallAndSwitchStatement - This method sets up the caller side by adding -/// the call instruction, splitting any PHI nodes in the header block as -/// necessary. -CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap, - std::vector ¶ms, - std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads, - StructType *StructArgTy , - AllocaInst *Struct - ) { - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - //std::vector ReloadOutputs, Reloads; - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; - - - // TOOD: Pass AllocaBlock - BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); - - - - - // Emit the call to the function - call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); - - - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { // TOOD: Move to constructFunction - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); - Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; - - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - - - for (BasicBlock* Block : Blocks) { - Instruction* TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock* OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock* NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } - } - } - - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - return call; -} - void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = (*Blocks.begin())->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); @@ -1959,14 +1707,247 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - +#if 0 CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap, params, StructValues, SwiftErrorArgs,ReloadOutputs,Reloads, StructArgTy, Struct ); +#else + Module *M = newFunction->getParent(); + LLVMContext &Context = M->getContext(); + const DataLayout &DL = M->getDataLayout(); + CallInst *call = nullptr; + + + // TOOD: Pass AllocaBlock + BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); + + + + + // Emit the call to the function + call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + + + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + codeReplacer->getInstList().push_back(call); + + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { // TOOD: Move to constructFunction + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", + codeReplacer); + Reloads.push_back(load); + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (!KeepOldBlocks) { + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. + unsigned switchVal = 0; + for (BasicBlock *OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + unsigned SuccNum = switchVal++; + + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } + + + + for (BasicBlock* Block : Blocks) { + Instruction* TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock* OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock* NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } + } + } + + + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertBefore); + ++OAI; + } + } + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + CallInst* TheCall =call; +#endif moveCodeToFunction(newFunction); From a4a59d716c92ae5ce5f04623156cb77ee42f26c8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 04:02:41 -0600 Subject: [PATCH 017/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 9 +- llvm/lib/Transforms/IPO/IROutliner.cpp | 21 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 198 ++++++++++++++---- .../Transforms/IROutliner/outlining-calls.ll | 3 - .../IROutliner/outlining-multiple-exits.ll | 1 - ...-nonorphan.ll => extract-block-cleanup.ll} | 23 +- .../extract-block-multiple-exits.ll | 103 +++++++++ 7 files changed, 278 insertions(+), 80 deletions(-) rename llvm/test/tools/llvm-extract/{extract-block-nonorphan.ll => extract-block-cleanup.ll} (76%) create mode 100644 llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 33f8320dee9e6..bd2015fafeacc 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -38,6 +38,7 @@ class Module; class Type; class Value; class StructType; +class LoadInst; /// A cache for the CodeExtractor analysis. The operation \ref /// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this @@ -105,6 +106,7 @@ class CodeExtractorAnalysisCache { // Mapping from the original exit blocks, to the new blocks inside // the function. SmallVector OldTargets; + // SmallVector > OldExitingEdges; // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block @@ -257,7 +259,7 @@ class CodeExtractorAnalysisCache { void analyzeBeforeExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, BlockFrequency &EntryFreq,DenseMap &ExitWeights, SmallPtrSet &ExitBlocks); - void prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs); + void prepareForExtraction(bool KeepOldBlocks); void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, @@ -274,7 +276,8 @@ class CodeExtractorAnalysisCache { SmallVectorImpl &SwiftErrorArgs, std::vector & ReloadOutputs,std::vector & Reloads, StructType *StructArgTy , - AllocaInst *Struct + AllocaInst *Struct , + function_ref MakeReloadAddress ); void moveCodeToFunction(Function *newFunction); @@ -283,8 +286,6 @@ class CodeExtractorAnalysisCache { BasicBlock *CodeReplacer, DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - - }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index e2796ebdcbb94..b8a314c54f18c 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -55,13 +55,6 @@ static cl::opt NoCostModel( cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost")); - -static -cl::opt -KeepOldBlocks("ir-outlining-copy", cl::init(false), - cl::Hidden, - cl::desc("Copy instead of moving instructions from original function.")); - /// The OutlinableGroup holds all the overarching information for outlining /// a set of regions that are structurally similar to one another, such as the /// types of the overall function, the output blocks, the sets of stores needed @@ -240,7 +233,7 @@ void OutlinableRegion::splitCandidate() { FollowBB = nullptr; } -void OutlinableRegion::reattachCandidate() { +void OutlinableRegion::reattachCandidate() { assert(CandidateSplit && "Candidate is not split!"); // The basic block gets reattached like so: @@ -273,20 +266,12 @@ void OutlinableRegion::reattachCandidate() { BasicBlock *PlacementBB = PrevBB; if (StartBB != EndBB) PlacementBB = EndBB; - if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr && FollowBB->getSinglePredecessor()) { + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); - //for (auto Pred : predecessors(FollowBB)) { - // if (Pred == PlacementBB) continue; - // Pred->replaceSuccessorsPhiUsesWith(FollowBB,nullptr); - //} PlacementBB->getTerminator()->eraseFromParent(); moveBBContents(*FollowBB, *PlacementBB); PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); - //FollowBB->replaceAllUsesWith(UndefValue::get(FollowBB->getType())); -// for (auto &&U : make_early_inc_range( FollowBB->uses())) { -// U.set(UndefValue::get(FollowBB->getType())); -// } FollowBB->eraseFromParent(); } @@ -1918,7 +1903,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) { Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); Region.ExtractedFunction = - Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs, KeepOldBlocks); + Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); // If the extraction was successful, find the BasicBlock, and reassign the // OutlinableRegion blocks diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 12d44b9d8cc9a..c758567b6112e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -63,6 +64,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include #include #include @@ -1340,7 +1342,7 @@ void CodeExtractor::analyzeBeforeExtraction( -void CodeExtractor::prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs) { +void CodeExtractor::prepareForExtraction(bool KeepOldBlocks) { // BasicBlock *header = *Blocks.begin(); // Function *oldFunction = header->getParent(); @@ -1349,6 +1351,29 @@ void CodeExtractor::prepareForExtraction(const CodeExtractorAnalysisCache &CEAC, splitReturnBlocks(); + if (KeepOldBlocks) { + //SmallPtrSet ExitBlocks; + for (BasicBlock *Block : Blocks) { + SmallVector Succs; + llvm::append_range(Succs, successors(Block) ); + + for (BasicBlock *&Succ : Succs) { + if (Blocks.count(Succ)) continue; + + if (!Succ->getSinglePredecessor()) { + Succ= SplitEdge(Block, Succ, DT); + } + + // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) + while (auto P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues()==1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + } + } @@ -1391,14 +1416,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (BasicBlock *Pred : predecessors(header)) { if (Blocks.count(Pred)) continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + EntryFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); } } // canonicalization - prepareForExtraction(CEAC,inputs, outputs); + prepareForExtraction(KeepOldBlocks); // analysis, after ret splitting @@ -1453,6 +1477,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); + + + #if 0 DenseMap> ExitValues; for (auto&& O : outputs) { @@ -1485,6 +1512,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); auto newHeader = codeReplacer; + IRBuilder<> CodeReplacerBuilder(codeReplacer); + + ValueToValueMapTy VMap; StructType *StructTy = nullptr; @@ -1517,6 +1547,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, NewValues.push_back(RewriteVal); } + // Set names for input and output arguments. if (!AggregateArgs) { AI = newFunction->arg_begin(); @@ -1526,8 +1557,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, AI->setName(outputs[i]->getName()+".out"); } - std::vector ReloadOutputs; - std::vector Reloads; + std::vector ReloadOutputs; + std::vector Reloads; // Add inputs as params, or to be filled into the struct @@ -1546,7 +1577,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ++ArgNo; } - // TOOD: Pass AllocaBlock + BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); Module* M = oldFunction->getParent(); const DataLayout& DL = M->getDataLayout(); @@ -1599,6 +1630,46 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + using InsertPointTy = IRBuilder<>::InsertPoint; + IRBuilder<> Builder(Context); + auto MakeReloadAddress = [&](int i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); + //GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + //codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + return Output; + }; + + +#if 0 + // Undo SSA for output values after the extracted region before dominator analysis is invalidated. + if (KeepOldBlocks) { + for (auto P : enumerate(outputs)) { + auto Idx = P.index(); + auto OutVal = P.value(); + + for (auto &&E : ExitBlocks) { + Builder.SetInsertPoint(E->getTerminator()); + auto Attr = MakeReloadAddress(Idx); + Builder.CreateStore(OutVal, ); + } + } + } +#endif + // Update the entry count of the function. if (BFI) { @@ -1629,7 +1700,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, StructValues, SwiftErrorArgs,ReloadOutputs, Reloads, - StructArgTy, Struct + StructArgTy, Struct, + MakeReloadAddress ); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -1718,17 +1790,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; // TOOD: Pass AllocaBlock BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); - - + // Emit the call to the function - call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + CallInst * call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "",codeReplacer); @@ -1740,7 +1810,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) call->setDebugLoc(DL); } - codeReplacer->getInstList().push_back(call); + // codeReplacer->getInstList().push_back(call); + + // Set swifterror parameter attributes. for (unsigned SwiftErrArgNo : SwiftErrorArgs) { // TOOD: Move to constructFunction @@ -1748,27 +1820,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); } - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); + // SmallVector AfterCall; + + + + // Reload the outputs passed in by reference. + Builder.SetInsertPoint(codeReplacer); for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); + // LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload",codeReplacer); + auto Output = MakeReloadAddress(i); + LoadInst *load = Builder.CreateLoad(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload"); + Reloads.push_back(load); std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -2031,7 +2095,8 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE SmallVectorImpl &SwiftErrorArgs, std::vector & ReloadOutputs,std::vector & Reloads, StructType *StructArgTy , - AllocaInst *Struct + AllocaInst *Struct , + function_ref MakeReloadAddress ) { // Assumption: this is a single-entry code region, and the header is the first block in the region. // BasicBlock *header = *Blocks.begin(); @@ -2244,10 +2309,11 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE std::advance(OutputArgBegin, inputs.size()); DenseMap ReloadReplacements; + SmallVector ReloadRepls; DenseMap ReloadAddress; - DenseMap SpillAddress; + // DenseMap SpillAddress; -#if 0 +#if 1 // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value* Output = nullptr; @@ -2267,18 +2333,20 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE // new StoreInst(outputs[i]->getType(), Output, ); - SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); + // SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); - continue; - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); + + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); + //ReloadReplacements[outputs[i]] if (KeepOldBlocks) { auto OrigOut = outputs[i]; //VMap[Out] = load; ReloadReplacements[OrigOut] = load; + ReloadRepls.push_back(load); + + // Remove all PHIs; will need to be recreated by SSAUpdater; } else { std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -2346,7 +2414,10 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE if (Blocks.count(OldTarget)) continue; BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) continue; + if (NewTarget) { + // llvm_unreachable("Happens if e.g. switch has multiple edges to target"); + continue; + } // If we don't already have an exit stub for this non-extracted // destination, create one now! @@ -2373,6 +2444,17 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE // auto OldPredecessor = OldTarget->getUniquePredecessor(); +#if 0 + if (KeepOldBlocks) { + for (auto&& P : OldTarget->phis()) { + auto Val = P.getIncomingValueForBlock(OldTarget); + Value *PHINewVal = Val; + if (auto X = ReloadReplacements.lookup(Val)) + PHINewVal = X; + P.addIncoming(PHINewVal, codeReplacer); + } + } +#endif // Update the switch instruction. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), @@ -2380,6 +2462,8 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE OldTarget); + + #if 0 if (KeepOldBlocks) { @@ -2454,6 +2538,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE + //if (!KeepOldBlocks) #if 1 for (BasicBlock *Block : Blocks) { @@ -2565,6 +2650,39 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE + + // Must be done after remap + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + auto OutIdx = P.index(); + auto OldVal = cast( P.value()); + auto NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (auto &&U : make_early_inc_range(OldVal->uses())) { + auto User = dyn_cast(U.getUser()); + if (!User) continue; + auto EffectiveUser = User->getParent(); + if (auto &&P = dyn_cast(User)) { + EffectiveUser= P->getIncomingBlock(U); + } + + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; + + + SSA.RewriteUseAfterInsertions(U); + } + } + + + + + // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke // result restore will be placed in the outlined function. diff --git a/llvm/test/Transforms/IROutliner/outlining-calls.ll b/llvm/test/Transforms/IROutliner/outlining-calls.ll index 355006bdac728..2372c4f674964 100644 --- a/llvm/test/Transforms/IROutliner/outlining-calls.ll +++ b/llvm/test/Transforms/IROutliner/outlining-calls.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -ir-outlining-copy < %s | FileCheck %s --check-prefixes=CHECK,KEEP ; This test checks that we do can outline calls, but only if they have the same ; function type and the same name. @@ -15,7 +14,6 @@ define void @function1() { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) -; KEEP: entry_after_outline: ; CHECK-NEXT: ret void ; entry: @@ -39,7 +37,6 @@ define void @function2() { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) -; KEEP: entry_after_outline: ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll b/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll index a07b81bd3f4d2..210da7b042e14 100644 --- a/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll +++ b/llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -ir-outlining-copy < %s | FileCheck %s --check-prefixes=KEEP ; Here we have multiple exits, but the different sources, same outputs are ; needed, this checks that they are compressed, and moved into the appropriate diff --git a/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll similarity index 76% rename from llvm/test/tools/llvm-extract/extract-block-nonorphan.ll rename to llvm/test/tools/llvm-extract/extract-block-cleanup.ll index 47b6821aefa32..0d30cbbc65cea 100644 --- a/llvm/test/tools/llvm-extract/extract-block-nonorphan.ll +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -1,11 +1,11 @@ -; RUN: llvm-extract -S -bb "foo:region_start;extractonly;both;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s +; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s ; CHECK-LABEL: define void @foo( ; ; CHECK: outsideonly: ; CHECK-NEXT: store i32 0, i32* %arg, align 4 -; CHECK-NEXT: br label %both +; CHECK-NEXT: br label %cleanup ; ; CHECK: codeRepl: ; CHECK-NEXT: call void @foo.region_start(i32* %arg) @@ -13,9 +13,9 @@ ; ; CHECK: extractonly: ; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %both +; CHECK-NEXT: br label %cleanup ; -; CHECK: both: +; CHECK: cleanup: ; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] ; CHECK-NEXT: switch @@ -28,9 +28,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: extractonly: ; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %both +; CHECK-NEXT: br label %cleanup ; CHECK-EMPTY: -; CHECK-NEXT: both: +; CHECK-NEXT: cleanup: ; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] ; CHECK-NEXT: switch i8 %dest, label %fallback [ ; CHECK-NEXT: i8 0, label %return.exitStub @@ -54,21 +54,16 @@ entry: outsideonly: store i32 0, i32* %arg, align 4 - br label %both + br label %cleanup region_start: - ;store i32 %arg, i32* %tmp1, align 4 - ;%tmp2 = load i32, i32* %tmp1, align 4 - ;%tmp3 = icmp sgt i32 %tmp2, 0 - ;store i8 1, i8* %dest, align 4 - ;br i1 %tmp3, label %extractonly, label %bb7 br label %extractonly extractonly: store i32 1, i32* %arg, align 4 - br label %both + br label %cleanup -both: +cleanup: %dest = phi i8 [0, %outsideonly], [1, %extractonly] switch i8 %dest, label %fallback [ i8 0, label %return diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll new file mode 100644 index 0000000000000..21b20e1f0f713 --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -0,0 +1,103 @@ +; RUN: llvm-extract -S -bb "foo:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + +; CHECK-LABEL: define void @foo( +; +; CHECK: outsideonly: +; CHECK-NEXT: store i32 0, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; +; CHECK: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %arg) +; CHECK-NEXT: br label %return +; +; CHECK: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; +; CHECK: cleanup: +; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] +; CHECK-NEXT: switch + + +; CHECK-LABEL: define internal void @foo.region_start(i32* %arg) { +; CHECK: br label %region_start +; +; CHECK: region_start: +; CHECK-NEXT: br label %extractonly +; CHECK-EMPTY: +; CHECK-NEXT: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; CHECK-EMPTY: +; CHECK-NEXT: cleanup: +; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %return.exitStub +; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: region_end: +; CHECK-NEXT: br label %return.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: return.exitStub: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +define void @foo(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +entry: + br i1 %c0, label %region_start, label %exit + +region_start: + %a = add i32 42, 1 + br i1 %c1, label %exiting0, label %exiting1 + +exiting0: + %b = add i32 42, 2 + br i1 %c2, label %exit, label %exit0 + +exiting1: + %c = add i32 42, 3 + switch i8 %dest, label %fallback [ + i8 0, label %exit0 + i8 1, label %exit1 + i8 2, label %exit2 + i8 3, label %exit0 + ] + +fallback: + unreachable + +exit: + %A = phi i32 [ 42, %entry ], [ %b, %exiting0 ] + store i32 %A, i32* %arg + br label %return + +exit0: + %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] ; Not working without --bb-keep-blocks (different incoming value after %exiting0 and %exiting1 is replace by codeReplacer) + store i32 %a, i32* %arg + store i32 %B, i32* %arg + br label %after + +exit1: + br label %after + +exit2: + %C = phi i32 [ %c, %exiting1 ] + store i32 %c, i32* %arg + store i32 %C, i32* %arg + br label %return + +after: + %D = phi i32 [ %b, %exit0 ], [ %c, %exit1 ] + store i32 %a, i32* %arg + store i32 %D, i32* %arg + br label %return + +return: + ret void +} From c2f7fb972dd7b2151706ce51e8fb20bc10a8ee78 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 05:00:38 -0600 Subject: [PATCH 018/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 77 +++++++++++-------- .../extract-blocks-with-groups.ll | 21 ++--- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c758567b6112e..8095f453746c6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1351,28 +1351,6 @@ void CodeExtractor::prepareForExtraction(bool KeepOldBlocks) { splitReturnBlocks(); - if (KeepOldBlocks) { - //SmallPtrSet ExitBlocks; - for (BasicBlock *Block : Blocks) { - SmallVector Succs; - llvm::append_range(Succs, successors(Block) ); - - for (BasicBlock *&Succ : Succs) { - if (Blocks.count(Succ)) continue; - - if (!Succ->getSinglePredecessor()) { - Succ= SplitEdge(Block, Succ, DT); - } - - // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) - while (auto P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues()==1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } - } - } } @@ -1442,8 +1420,53 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + // canonicalization + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(header); + + // canonicalization, after ret splitting + severSplitPHINodesOfExits(ExitBlocks); + + + if (KeepOldBlocks) { + for (BasicBlock *Block : Blocks) { + SmallVector Succs; + llvm::append_range(Succs, successors(Block) ); + + for (BasicBlock *Succ : Succs) { + if (Blocks.count(Succ)) continue; + + if (!Succ->getSinglePredecessor()) { + Succ= SplitEdge(Block, Succ, DT); + } + + // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) + while (auto P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues()==1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + + + + ExitBlocks.clear(); + for (BasicBlock* Block : Blocks) { + for (BasicBlock* Succ : successors(Block)) { + if (Blocks.count(Succ)) continue; + + ExitBlocks.insert(Succ); + } + } + NumExitBlocks = ExitBlocks.size(); + } + + + // analyzis, after ret splitting - // DenseMap ExitingBlocks; + // DenseMap ExitingBlocks; for (BasicBlock *Block : Blocks) { Instruction *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -1451,18 +1474,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, continue; BasicBlock *OldTarget = TI->getSuccessor(i); OldTargets.push_back(OldTarget); - // ExitingBlocks[Block] = OldTarget; + // ExitingBlocks[Block] = OldTarget; } } - // canonicalization - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(header); - - // canonicalization, after ret splitting - severSplitPHINodesOfExits(ExitBlocks); // analysis diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index 8ee1c69038c98..eda65dc6269df 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -17,18 +17,18 @@ ; CHECK: newFuncRoot: ; CHECK: br label %if.split ; -; CHECK: then: +; CHECK: then: ; preds = %if.split ; CHECK: %tmp12 = shl i32 %arg1, 2 ; CHECK: %tmp13 = add nsw i32 %tmp12, %arg ; CHECK: br label %end.split ; -; CHECK: else: +; CHECK: else: ; preds = %if.split ; CHECK: %tmp22 = mul nsw i32 %arg, 3 ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: br label %end.split ; -; CHECK: if.split: +; CHECK: if.split: ; preds = %newFuncRoot ; CHECK: %tmp5 = icmp sgt i32 %arg, 0 ; CHECK: %tmp8 = icmp sgt i32 %arg1, 0 ; CHECK: %or.cond = and i1 %tmp5, %tmp8 @@ -39,11 +39,10 @@ ; CHECK: store i32 %tmp.0.ce, i32* %tmp.0.ce.out ; CHECK: br label %end.exitStub ; -; CHECK: end.exitStub: +; CHECK: end.exitStub: ; preds = %end.split ; CHECK: ret void ; CHECK: } - ; The second extracted function is the region composed by the blocks ; bb14 and bb20 from bar. ; KILL-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { @@ -51,22 +50,24 @@ ; CHECK: newFuncRoot: ; CHECK: br label %bb14 ; -; CHECK: bb14: +; CHECK: bb14: ; preds = %newFuncRoot ; CHECK: %tmp0 = and i32 %arg1, %arg ; CHECK: %tmp1 = icmp slt i32 %tmp0, 0 ; CHECK: br i1 %tmp1, label %bb20, label %bb26.exitStub ; -; CHECK: bb20: +; CHECK: bb20: ; preds = %bb14 ; CHECK: %tmp22 = mul nsw i32 %arg, 3 ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: store i32 %tmp25, i32* %tmp25.out -; CHECK: br label %bb30.exitStub +; KILL: br label %bb30.exitStub +; KEEP: br label %bb20.split.exitStub ; -; CHECK: bb26.exitStub: +; CHECK: bb26.exitStub: ; preds = %bb14 ; CHECK: ret i1 true ; -; CHECK: bb30.exitStub: +; KILL: bb30.exitStub: ; preds = %bb20 +; KEEP: bb20.split.exitStub: ; CHECK: ret i1 false ; CHECK: } From 1cb62c450195a7c6d118b5f43d8ecc0f1f5094bb Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 05:10:06 -0600 Subject: [PATCH 019/130] WIP --- .../llvm-extract/extract-block-cleanup.ll | 75 ++++++++++++++----- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll index 0d30cbbc65cea..971a48b67d579 100644 --- a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -1,29 +1,60 @@ ; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s -; CHECK-LABEL: define void @foo( -; -; CHECK: outsideonly: +; CHECK-LABEL: define void @foo(i32* %arg, i1 %c) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly +; CHECK-EMPTY: +; CHECK-NEXT: outsideonly: ; CHECK-NEXT: store i32 0, i32* %arg, align 4 ; CHECK-NEXT: br label %cleanup -; -; CHECK: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %arg) -; CHECK-NEXT: br label %return -; -; CHECK: extractonly: +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: %targetBlock = call i1 @foo.region_start(i32* %arg) +; CHECK-NEXT: br i1 %targetBlock, label %cleanup.return_crit_edge, label %region_end.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: br label %extractonly +; CHECK-EMPTY: +; CHECK-NEXT: extractonly: ; CHECK-NEXT: store i32 1, i32* %arg, align 4 ; CHECK-NEXT: br label %cleanup -; -; CHECK: cleanup: +; CHECK-EMPTY: +; CHECK-NEXT: cleanup: ; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] -; CHECK-NEXT: switch +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge +; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: cleanup.return_crit_edge: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: region_end: +; CHECK-NEXT: br label %region_end.split +; CHECK-EMPTY: +; CHECK-NEXT: region_end.split: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: outsidecont: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: ret void +; CHECK-NEXT: } -; CHECK-LABEL: define internal void @foo.region_start(i32* %arg) { -; CHECK: br label %region_start -; -; CHECK: region_start: +; CHECK-LABEL: define internal i1 @foo.region_start(i32* %arg) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %newFuncRoot +; CHECK-EMPTY: +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: ; CHECK-NEXT: br label %extractonly ; CHECK-EMPTY: ; CHECK-NEXT: extractonly: @@ -33,7 +64,7 @@ ; CHECK-NEXT: cleanup: ; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] ; CHECK-NEXT: switch i8 %dest, label %fallback [ -; CHECK-NEXT: i8 0, label %return.exitStub +; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge.exitStub ; CHECK-NEXT: i8 1, label %region_end ; CHECK-NEXT: ] ; CHECK-EMPTY: @@ -41,13 +72,17 @@ ; CHECK-NEXT: unreachable ; CHECK-EMPTY: ; CHECK-NEXT: region_end: -; CHECK-NEXT: br label %return.exitStub +; CHECK-NEXT: br label %region_end.split.exitStub ; CHECK-EMPTY: -; CHECK-NEXT: return.exitStub: -; CHECK-NEXT: ret void +; CHECK-NEXT: cleanup.return_crit_edge.exitStub: +; CHECK-NEXT: ret i1 true +; CHECK-EMPTY: +; CHECK-NEXT: region_end.split.exitStub: +; CHECK-NEXT: ret i1 false ; CHECK-NEXT: } + define void @foo(i32* %arg, i1 %c) { entry: br i1 %c, label %region_start, label %outsideonly From 2eb5feb0334042f13c6341eebb1db766144e821a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 05:18:09 -0600 Subject: [PATCH 020/130] WIP --- .../extract-block-multiple-exits.ll | 168 +++++++++++++----- 1 file changed, 128 insertions(+), 40 deletions(-) diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index 21b20e1f0f713..25540200a15b1 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -1,54 +1,142 @@ -; RUN: llvm-extract -S -bb "foo:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s +; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s -; CHECK-LABEL: define void @foo( -; -; CHECK: outsideonly: -; CHECK-NEXT: store i32 0, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; -; CHECK: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %arg) -; CHECK-NEXT: br label %return -; -; CHECK: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; -; CHECK: cleanup: -; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] -; CHECK-NEXT: switch - - -; CHECK-LABEL: define internal void @foo.region_start(i32* %arg) { -; CHECK: br label %region_start -; -; CHECK: region_start: -; CHECK-NEXT: br label %extractonly -; CHECK-EMPTY: -; CHECK-NEXT: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; CHECK-EMPTY: -; CHECK-NEXT: cleanup: -; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] +; CHECK-LABEL: define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %B.ce.loc = alloca i32, align 4 +; CHECK-NEXT: %c.loc = alloca i32, align 4 +; CHECK-NEXT: %b.loc = alloca i32, align 4 +; CHECK-NEXT: %a.loc = alloca i32, align 4 +; CHECK-NEXT: br i1 %c0, label %codeRepl, label %exit +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.loc, i32* %b.loc, i32* %c.loc, i32* %B.ce.loc) +; CHECK-NEXT: %a.reload = load i32, i32* %a.loc, align 4 +; CHECK-NEXT: %b.reload = load i32, i32* %b.loc, align 4 +; CHECK-NEXT: %c.reload = load i32, i32* %c.loc, align 4 +; CHECK-NEXT: %B.ce.reload = load i32, i32* %B.ce.loc, align 4 +; CHECK-NEXT: switch i16 %targetBlock, label %exit0 [ +; CHECK-NEXT: i16 0, label %exiting0.exit_crit_edge +; CHECK-NEXT: i16 1, label %fallback +; CHECK-NEXT: i16 2, label %exit1 +; CHECK-NEXT: i16 3, label %exit2 +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: %a = add i32 42, 1 +; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 +; CHECK-EMPTY: +; CHECK-NEXT: exiting0: +; CHECK-NEXT: %b = add i32 42, 2 +; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge, label %exit0.split +; CHECK-EMPTY: +; CHECK-NEXT: exiting0.exit_crit_edge: +; CHECK-NEXT: %b.merge_with_extracted4 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] +; CHECK-NEXT: br label %exit +; CHECK-EMPTY: +; CHECK-NEXT: exiting1: +; CHECK-NEXT: %c = add i32 42, 3 ; CHECK-NEXT: switch i8 %dest, label %fallback [ -; CHECK-NEXT: i8 0, label %return.exitStub -; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: i8 0, label %exit0.split +; CHECK-NEXT: i8 1, label %exit1 +; CHECK-NEXT: i8 2, label %exit2 +; CHECK-NEXT: i8 3, label %exit0.split ; CHECK-NEXT: ] ; CHECK-EMPTY: -; CHECK-NEXT: fallback: +; CHECK-NEXT: fallback: ; CHECK-NEXT: unreachable ; CHECK-EMPTY: -; CHECK-NEXT: region_end: -; CHECK-NEXT: br label %return.exitStub +; CHECK-NEXT: exit: +; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted4, %exiting0.exit_crit_edge ] +; CHECK-NEXT: store i32 %A, i32* %arg, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: exit0.split: +; CHECK-NEXT: %b.merge_with_extracted3 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] +; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] +; CHECK-NEXT: br label %exit0 +; CHECK-EMPTY: +; CHECK-NEXT: exit0: +; CHECK-NEXT: %B.ce.merge_with_extracted = phi i32 [ %B.ce.reload, %codeRepl ], [ %B.ce, %exit0.split ] +; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted3, %exit0.split ] +; CHECK-NEXT: %a.merge_with_extracted2 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] +; CHECK-NEXT: store i32 %a.merge_with_extracted2, i32* %arg, align 4 +; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: br label %after +; CHECK-EMPTY: +; CHECK-NEXT: exit1: +; CHECK-NEXT: %c.merge_with_extracted5 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: %a.merge_with_extracted1 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] +; CHECK-NEXT: br label %after +; CHECK-EMPTY: +; CHECK-NEXT: exit2: +; CHECK-NEXT: %c.merge_with_extracted = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: after: +; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted2, %exit0 ], [ %a.merge_with_extracted1, %exit1 ] +; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted5, %exit1 ] +; CHECK-NEXT: store i32 %a.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: store i32 %D, i32* %arg, align 4 +; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: return.exitStub: +; CHECK-NEXT: return: ; CHECK-NEXT: ret void ; CHECK-NEXT: } -define void @foo(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.out, i32* %b.out, i32* %c.out, i32* %B.ce.out) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %newFuncRoot +; CHECK-EMPTY: +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: %a = add i32 42, 1 +; CHECK-NEXT: store i32 %a, i32* %a.out, align 4 +; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 +; CHECK-EMPTY: +; CHECK-NEXT: exiting0: +; CHECK-NEXT: %b = add i32 42, 2 +; CHECK-NEXT: store i32 %b, i32* %b.out, align 4 +; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge.exitStub, label %exit0.split +; CHECK-EMPTY: +; CHECK-NEXT: exiting1: +; CHECK-NEXT: %c = add i32 42, 3 +; CHECK-NEXT: store i32 %c, i32* %c.out, align 4 +; CHECK-NEXT: switch i8 %dest, label %fallback.exitStub [ +; CHECK-NEXT: i8 0, label %exit0.split +; CHECK-NEXT: i8 1, label %exit1.exitStub +; CHECK-NEXT: i8 2, label %exit2.exitStub +; CHECK-NEXT: i8 3, label %exit0.split +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: exit0.split: +; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] +; CHECK-NEXT: store i32 %B.ce, i32* %B.ce.out, align 4 +; CHECK-NEXT: br label %exit0.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: exiting0.exit_crit_edge.exitStub: +; CHECK-NEXT: ret i16 0 +; CHECK-EMPTY: +; CHECK-NEXT: fallback.exitStub: +; CHECK-NEXT: ret i16 1 +; CHECK-EMPTY: +; CHECK-NEXT: exit1.exitStub: +; CHECK-NEXT: ret i16 2 +; CHECK-EMPTY: +; CHECK-NEXT: exit2.exitStub: +; CHECK-NEXT: ret i16 3 +; CHECK-EMPTY: +; CHECK-NEXT: exit0.exitStub: +; CHECK-NEXT: ret i16 4 +; CHECK-NEXT: } + + +define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { entry: br i1 %c0, label %region_start, label %exit @@ -78,7 +166,7 @@ exit: br label %return exit0: - %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] ; Not working without --bb-keep-blocks (different incoming value after %exiting0 and %exiting1 is replace by codeReplacer) + %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] store i32 %a, i32* %arg store i32 %B, i32* %arg br label %after From 46b8d94a291eefc5b07bcfa21b8315ee59a719cf Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 05:31:24 -0600 Subject: [PATCH 021/130] WIP --- llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 5f7b0111c1c62..cc7635086d4ec 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -181,13 +181,13 @@ TEST(CodeExtractor, ExitBlockOrderingPhis) { ReturnInst *FirstReturn = dyn_cast(FirstTerm); EXPECT_TRUE(FirstReturn); ConstantInt *CIFirst = dyn_cast(FirstReturn->getReturnValue()); - EXPECT_TRUE(CIFirst->getLimitedValue() == 1u); + EXPECT_TRUE(CIFirst->getLimitedValue() == 0u); Instruction *NextTerm = NextExitStub->getTerminator(); ReturnInst *NextReturn = dyn_cast(NextTerm); EXPECT_TRUE(NextReturn); ConstantInt *CINext = dyn_cast(NextReturn->getReturnValue()); - EXPECT_TRUE(CINext->getLimitedValue() == 0u); + EXPECT_TRUE(CINext->getLimitedValue() == 1u); EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func)); From ef5ecdafe85d74e53553c6ab4e0caf3767e51d8b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 17:13:02 -0600 Subject: [PATCH 022/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 + llvm/lib/Transforms/Utils/CodeExtractor.cpp | 37 +++++++++++++------ .../Transforms/Utils/CodeExtractorTest.cpp | 4 +- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index bd2015fafeacc..c333ab25ec7f1 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -107,6 +107,7 @@ class CodeExtractorAnalysisCache { // the function. SmallVector OldTargets; // SmallVector > OldExitingEdges; + SmallPtrSet ExitBlocks; // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block @@ -114,6 +115,7 @@ class CodeExtractorAnalysisCache { std::string Suffix; + void recomputeExitBlocks(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8095f453746c6..b93ba97fd1b08 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1368,6 +1368,23 @@ static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Bloc } } + +void CodeExtractor::recomputeExitBlocks() { + OldTargets.clear(); + ExitBlocks.clear(); + + + for (BasicBlock* Block : Blocks) { + for (BasicBlock* Succ : successors(Block)) { + if (Blocks.count(Succ)) continue; + + ExitBlocks.insert(Succ); + OldTargets.push_back(Succ); + } + } + NumExitBlocks = ExitBlocks.size(); +} + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { @@ -1383,7 +1400,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BlockFrequency EntryFreq; DenseMap ExitWeights; - SmallPtrSet ExitBlocks; + // SmallPtrSet ExitBlocks; // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); @@ -1404,6 +1421,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // analysis, after ret splitting + + + for (BasicBlock *Block : Blocks) { for (BasicBlock *Succ : successors(Block)) { if (!Blocks.count(Succ)) { @@ -1412,13 +1432,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BlockFrequency &BF = ExitWeights[Succ]; BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); } - ExitBlocks.insert(Succ); + // ExitBlocks.insert(Succ); } } } - NumExitBlocks = ExitBlocks.size(); + // NumExitBlocks = ExitBlocks.size(); + recomputeExitBlocks(); // canonicalization @@ -1452,15 +1473,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ExitBlocks.clear(); - for (BasicBlock* Block : Blocks) { - for (BasicBlock* Succ : successors(Block)) { - if (Blocks.count(Succ)) continue; - - ExitBlocks.insert(Succ); - } - } - NumExitBlocks = ExitBlocks.size(); + recomputeExitBlocks(); } diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index cc7635086d4ec..5f7b0111c1c62 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -181,13 +181,13 @@ TEST(CodeExtractor, ExitBlockOrderingPhis) { ReturnInst *FirstReturn = dyn_cast(FirstTerm); EXPECT_TRUE(FirstReturn); ConstantInt *CIFirst = dyn_cast(FirstReturn->getReturnValue()); - EXPECT_TRUE(CIFirst->getLimitedValue() == 0u); + EXPECT_TRUE(CIFirst->getLimitedValue() == 1u); Instruction *NextTerm = NextExitStub->getTerminator(); ReturnInst *NextReturn = dyn_cast(NextTerm); EXPECT_TRUE(NextReturn); ConstantInt *CINext = dyn_cast(NextReturn->getReturnValue()); - EXPECT_TRUE(CINext->getLimitedValue() == 1u); + EXPECT_TRUE(CINext->getLimitedValue() == 0u); EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func)); From 5e351a62aeb44e2cb0442c229320a6af6e651c70 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 17:28:32 -0600 Subject: [PATCH 023/130] cleanup unused --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index c333ab25ec7f1..fcd6e94f9e34f 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -250,7 +250,7 @@ class CodeExtractorAnalysisCache { const ValueSet &outputs) ; - Function *constructFunction2(const ValueSet &inputs, + Function *constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, // BasicBlock *&newRootNode, BasicBlock *newHeader, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index b93ba97fd1b08..48205a34ffee8 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -823,7 +823,7 @@ void CodeExtractor::splitReturnBlocks() { } -Function *CodeExtractor::constructFunction2(const ValueSet &inputs, +Function *CodeExtractor::constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, // BasicBlock *&newRootNode, BasicBlock *newHeader, @@ -1528,7 +1528,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, #endif // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunction2(inputs, outputs, header, oldFunction, oldFunction->getParent()); + Function *newFunction = constructFunction(inputs, outputs, header, oldFunction, oldFunction->getParent()); // The new function needs a root node because other nodes can branch to the @@ -1819,11 +1819,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, #else Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); + // const DataLayout &DL = M->getDataLayout(); // TOOD: Pass AllocaBlock - BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); + // BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); @@ -2239,7 +2239,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE // Module *M = newFunction->getParent(); LLVMContext& Context = M->getContext(); - const DataLayout& DL = M->getDataLayout(); + // const DataLayout& DL = M->getDataLayout(); CallInst* call = nullptr; #if 0 @@ -2394,7 +2394,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE codeReplacer, 0, codeReplacer); - auto newFuncIt = newFunction->front().getIterator(); + // auto newFuncIt = newFunction->front().getIterator(); for (BasicBlock* Block : Blocks) { BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); @@ -2564,7 +2564,7 @@ void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CE } - for (auto&& O : outputs) { } + // for (auto&& O : outputs) { } From f5869dab7f227d18ac53ff612c7fba88bd3cecd9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 17:38:15 -0600 Subject: [PATCH 024/130] Working on BFI --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 48205a34ffee8..1bea4538a4203 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1404,8 +1404,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); - // Calculate the entry frequency of the new function before we change the root - // block. + + + // canonicalization + prepareForExtraction(KeepOldBlocks); + + + // Calculate the entry frequency of the new function before we change the root + // block. if (BFI) { assert(BPI && "Both BPI and BFI are required to preserve profile info"); for (BasicBlock *Pred : predecessors(header)) { @@ -1416,10 +1422,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - // canonicalization - prepareForExtraction(KeepOldBlocks); - - // analysis, after ret splitting From f4a104305c594ef837e3ae4ee24ccbe6c8080990 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 17:43:01 -0600 Subject: [PATCH 025/130] Working on BFI --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 41 +++++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1bea4538a4203..f0eebccc295e2 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1398,8 +1398,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Function *oldFunction = header->getParent(); - BlockFrequency EntryFreq; - DenseMap ExitWeights; + // SmallPtrSet ExitBlocks; // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); @@ -1410,8 +1409,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, prepareForExtraction(KeepOldBlocks); + recomputeExitBlocks(); + // Calculate the entry frequency of the new function before we change the root // block. + BlockFrequency EntryFreq; + DenseMap ExitWeights; if (BFI) { assert(BPI && "Both BPI and BFI are required to preserve profile info"); for (BasicBlock *Pred : predecessors(header)) { @@ -1419,6 +1422,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, continue; EntryFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); } + + for (BasicBlock *Succ : ExitBlocks) { + for (BasicBlock *Block : predecessors(Succ)) { + if (!Blocks.count(Block)) continue; + + // for (BasicBlock *Block : Blocks) { + // for (BasicBlock *Succ : successors(Block)) { + // if (!Blocks.count(Succ)) { + // Update the branch weight for this successor. + // if (BFI) { + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + // } + // ExitBlocks.insert(Succ); + // } + } + } + // NumExitBlocks = ExitBlocks.size(); } @@ -1426,19 +1447,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : successors(Block)) { - if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } - // ExitBlocks.insert(Succ); - } - } - } - // NumExitBlocks = ExitBlocks.size(); + recomputeExitBlocks(); @@ -1667,7 +1676,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); - using InsertPointTy = IRBuilder<>::InsertPoint; + //using InsertPointTy = IRBuilder<>::InsertPoint; IRBuilder<> Builder(Context); auto MakeReloadAddress = [&](int i) { Value *Output = nullptr; From c121f72d6ae6f2db9bed1a6d79521d4d2836b4ed Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:17:03 -0600 Subject: [PATCH 026/130] Working on BFI --- .../llvm/Transforms/Utils/CodeExtractor.h | 3 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 132 ++++++++---------- 2 files changed, 63 insertions(+), 72 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index fcd6e94f9e34f..a02b74c7c7161 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -258,10 +258,9 @@ class CodeExtractorAnalysisCache { //, bool KeepOldBlocks, ValueToValueMapTy &VMap ); - void analyzeBeforeExtraction(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, BlockFrequency &EntryFreq,DenseMap &ExitWeights, SmallPtrSet &ExitBlocks); - void prepareForExtraction(bool KeepOldBlocks); + void prepareForExtraction(BasicBlock *&Header, bool KeepOldBlocks); void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f0eebccc295e2..cda730bbd0ee5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -52,7 +52,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -804,9 +803,12 @@ void CodeExtractor::severSplitPHINodesOfExits( void CodeExtractor::splitReturnBlocks() { for (BasicBlock *Block : Blocks) - if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { - BasicBlock *New = - Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); + if (ReturnInst* RI = dyn_cast(Block->getTerminator())) { + BasicBlock* New = + Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); + + + if (DT) { // Old dominates New. New node dominates all other nodes dominated // by Old. @@ -819,6 +821,14 @@ void CodeExtractor::splitReturnBlocks() { for (DomTreeNode *I : Children) DT->changeImmediateDominator(I, NewNode); } + + if (BFI) { + BFI->setBlockFreq(New, BFI->getBlockFreq(Block).getFrequency()); + } + if (BPI) { + // BPI->getEdgeProbability() + // BPI->setEdgeProbability(); + } } } @@ -1303,46 +1313,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool Ke -void CodeExtractor::analyzeBeforeExtraction( - const CodeExtractorAnalysisCache& CEAC, ValueSet& inputs, ValueSet& outputs, - BlockFrequency& EntryFreq, - DenseMap &ExitWeights, SmallPtrSet &ExitBlocks) { - BasicBlock *header = *Blocks.begin(); - // Function *oldFunction = header->getParent(); - - // Calculate the entry frequency of the new function before we change the root - // block. - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } - } - - // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. - - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : successors(Block)) { - if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } - ExitBlocks.insert(Succ); - } - } - } - NumExitBlocks = ExitBlocks.size(); -} - - -void CodeExtractor::prepareForExtraction(bool KeepOldBlocks) { +void CodeExtractor::prepareForExtraction(BasicBlock *&Header,bool KeepOldBlocks) { // BasicBlock *header = *Blocks.begin(); // Function *oldFunction = header->getParent(); @@ -1350,6 +1322,11 @@ void CodeExtractor::prepareForExtraction(bool KeepOldBlocks) { // that the return is not in the region. splitReturnBlocks(); + // canonicalization + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + + recomputeExitBlocks(); } @@ -1399,17 +1376,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + // SmallPtrSet ExitBlocks; // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); + // canonicalization - prepareForExtraction(KeepOldBlocks); + prepareForExtraction(header, KeepOldBlocks); + - recomputeExitBlocks(); // Calculate the entry frequency of the new function before we change the root // block. @@ -1427,22 +1406,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (BasicBlock *Block : predecessors(Succ)) { if (!Blocks.count(Block)) continue; - // for (BasicBlock *Block : Blocks) { - // for (BasicBlock *Succ : successors(Block)) { - // if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - // if (BFI) { - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - // } - // ExitBlocks.insert(Succ); - // } + // for (BasicBlock *Block : Blocks) { + // for (BasicBlock *Succ : successors(Block)) { + // if (!Blocks.count(Succ)) { + // Update the branch weight for this successor. + // if (BFI) { + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + // } + // ExitBlocks.insert(Succ); + // } } } // NumExitBlocks = ExitBlocks.size(); } + + // analysis, after ret splitting @@ -1450,12 +1431,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - recomputeExitBlocks(); + // recomputeExitBlocks(); + - // canonicalization - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(header); // canonicalization, after ret splitting severSplitPHINodesOfExits(ExitBlocks); @@ -1553,7 +1532,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); auto newHeader = codeReplacer; - IRBuilder<> CodeReplacerBuilder(codeReplacer); + // IRBuilder<> CodeReplacerBuilder(codeReplacer); ValueToValueMapTy VMap; @@ -1676,24 +1655,25 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); +#if 0 //using InsertPointTy = IRBuilder<>::InsertPoint; - IRBuilder<> Builder(Context); + // IRBuilder<> Builder(Context); auto MakeReloadAddress = [&](int i) { Value *Output = nullptr; if (AggregateArgs) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); - //GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - //codeReplacer->getInstList().push_back(GEP); + //Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { Output = ReloadOutputs[i]; } return Output; }; - +#endif #if 0 // Undo SSA for output values after the extracted region before dominator analysis is invalidated. @@ -1742,7 +1722,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SwiftErrorArgs,ReloadOutputs, Reloads, StructArgTy, Struct, - MakeReloadAddress + {} ); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -1868,11 +1848,23 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Reload the outputs passed in by reference. - Builder.SetInsertPoint(codeReplacer); + // Builder.SetInsertPoint(codeReplacer); for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - // LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload",codeReplacer); - auto Output = MakeReloadAddress(i); - LoadInst *load = Builder.CreateLoad(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload"); + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + //Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload",codeReplacer); + // auto Output = MakeReloadAddress(i); + // LoadInst *load = Builder.CreateLoad(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload"); Reloads.push_back(load); std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); From 4b29ca0332b7d33783403554d50342fbf5c98530 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:22:10 -0600 Subject: [PATCH 027/130] severSplitPHINodesOfExits --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index a02b74c7c7161..77ceb07fde109 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -240,7 +240,7 @@ class CodeExtractorAnalysisCache { Instruction *Addr, BasicBlock *ExitBlock) const; void severSplitPHINodesOfEntry(BasicBlock *&Header); - void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); + void severSplitPHINodesOfExits(); void splitReturnBlocks(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cda730bbd0ee5..20439e6e3f65a 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -756,9 +756,8 @@ void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { /// outlined region, we split these PHIs on two: one with inputs from region /// and other with remaining incoming blocks; then first PHIs are placed in /// outlined region. -void CodeExtractor::severSplitPHINodesOfExits( - const SmallPtrSetImpl &Exits) { - for (BasicBlock *ExitBB : Exits) { +void CodeExtractor::severSplitPHINodesOfExits() { + for (BasicBlock *ExitBB : ExitBlocks) { BasicBlock *NewBB = nullptr; for (PHINode &PN : ExitBB->phis()) { @@ -1328,6 +1327,8 @@ void CodeExtractor::prepareForExtraction(BasicBlock *&Header,bool KeepOldBlocks) recomputeExitBlocks(); + severSplitPHINodesOfExits(); + // recomputeExitBlocks(); } @@ -1362,6 +1363,8 @@ void CodeExtractor::recomputeExitBlocks() { NumExitBlocks = ExitBlocks.size(); } + + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { @@ -1437,7 +1440,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // canonicalization, after ret splitting - severSplitPHINodesOfExits(ExitBlocks); + if (KeepOldBlocks) { From 2d7ffba121ac43dc661036e167a1a9cd1bf5ae21 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:28:29 -0600 Subject: [PATCH 028/130] OldTargets --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 57 +++++++++++---------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 20439e6e3f65a..66330251631a0 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1330,6 +1330,34 @@ void CodeExtractor::prepareForExtraction(BasicBlock *&Header,bool KeepOldBlocks) severSplitPHINodesOfExits(); // recomputeExitBlocks(); + + if (KeepOldBlocks) { + // TODO: preserve BPI/BFI + for (BasicBlock *Block : Blocks) { + SmallVector Succs; + llvm::append_range(Succs, successors(Block) ); + + for (BasicBlock *Succ : Succs) { + if (Blocks.count(Succ)) continue; + + if (!Succ->getSinglePredecessor()) { + Succ= SplitEdge(Block, Succ, DT); + } + + // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) + while (auto P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues()==1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + + + + recomputeExitBlocks(); + } + } @@ -1443,34 +1471,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - if (KeepOldBlocks) { - for (BasicBlock *Block : Blocks) { - SmallVector Succs; - llvm::append_range(Succs, successors(Block) ); - - for (BasicBlock *Succ : Succs) { - if (Blocks.count(Succ)) continue; - - if (!Succ->getSinglePredecessor()) { - Succ= SplitEdge(Block, Succ, DT); - } - - // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) - while (auto P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues()==1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } - } - - - - recomputeExitBlocks(); - } - +#if 0 // analyzis, after ret splitting // DenseMap ExitingBlocks; for (BasicBlock *Block : Blocks) { @@ -1483,7 +1486,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // ExitingBlocks[Block] = OldTarget; } } - +#endif From c730d393dcd94fdfcf3328b1d3597d2946e11da8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:47:06 -0600 Subject: [PATCH 029/130] Trying to inline extractCodeRegionByCopy --- .../llvm/Transforms/Utils/CodeExtractor.h | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 114 +++++------------- 2 files changed, 34 insertions(+), 92 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 77ceb07fde109..6be332813e6a8 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -250,17 +250,15 @@ class CodeExtractorAnalysisCache { const ValueSet &outputs) ; - Function *constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - // BasicBlock *&newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M - //, bool KeepOldBlocks, ValueToValueMapTy &VMap + Function *constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, + BasicBlock *header ); + - void prepareForExtraction(BasicBlock *&Header, bool KeepOldBlocks); + + void canonicalizeForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 66330251631a0..cbce57ca86d05 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -831,16 +831,15 @@ void CodeExtractor::splitReturnBlocks() { } } - -Function *CodeExtractor::constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - // BasicBlock *&newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M//, bool KeepOldBlocks, ValueToValueMapTy &VMap -){ +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header){ LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + Function *oldFunction = header->getParent(); + Module *M = oldFunction->getParent(); + // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: @@ -1011,6 +1010,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } + // Set names for input and output arguments. + if (!AggregateArgs) { + Function::arg_iterator AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } return newFunction; } @@ -1313,7 +1320,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool Ke -void CodeExtractor::prepareForExtraction(BasicBlock *&Header,bool KeepOldBlocks) { +void CodeExtractor::canonicalizeForExtraction(BasicBlock *&Header,bool NoExitBlockPHIs) { // BasicBlock *header = *Blocks.begin(); // Function *oldFunction = header->getParent(); @@ -1331,7 +1338,7 @@ void CodeExtractor::prepareForExtraction(BasicBlock *&Header,bool KeepOldBlocks) // recomputeExitBlocks(); - if (KeepOldBlocks) { + if (NoExitBlockPHIs) { // TODO: preserve BPI/BFI for (BasicBlock *Block : Blocks) { SmallVector Succs; @@ -1416,7 +1423,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // canonicalization - prepareForExtraction(header, KeepOldBlocks); + canonicalizeForExtraction(header, KeepOldBlocks); @@ -1437,61 +1444,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (BasicBlock *Block : predecessors(Succ)) { if (!Blocks.count(Block)) continue; - // for (BasicBlock *Block : Blocks) { - // for (BasicBlock *Succ : successors(Block)) { - // if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - // if (BFI) { BlockFrequency &BF = ExitWeights[Succ]; BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - // } - // ExitBlocks.insert(Succ); - // } } } - // NumExitBlocks = ExitBlocks.size(); } - // analysis, after ret splitting - - - - - - - // recomputeExitBlocks(); - - - - - // canonicalization, after ret splitting - - - - - -#if 0 - // analyzis, after ret splitting - // DenseMap ExitingBlocks; - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - OldTargets.push_back(OldTarget); - // ExitingBlocks[Block] = OldTarget; - } - } -#endif - - // analysis ValueSet SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); @@ -1504,27 +1468,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, findInputsOutputs(inputs, outputs, SinkingCands); - - -#if 0 - DenseMap> ExitValues; - for (auto&& O : outputs) { - auto &&I = cast(O); - for (auto &&U : I->uses()) { - auto User = dyn_cast(U.getUser()); - if (!User) continue; - if (Blocks.count(User->getParent())) continue; - - for (auto &&E : ExitBlocks) { - if (DT->dominates(E, User->getParent()) - ExitValues[E].push_back(cast O); - } - } - } -#endif // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunction(inputs, outputs, header, oldFunction, oldFunction->getParent()); + Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); + // The new function needs a root node because other nodes can branch to the @@ -1538,18 +1485,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); auto newHeader = codeReplacer; - // IRBuilder<> CodeReplacerBuilder(codeReplacer); - ValueToValueMapTy VMap; - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - //StructTy = StructType::get(M->getContext(), paramTy); - StructTy = cast(newFunction->getArg(0)->getType()); - } + StructType *StructTy = nullptr; + if (AggregateArgs && newFunction->arg_size() > 0) + StructTy = cast(newFunction->getArg(0)->getType()); + + + // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); @@ -1573,7 +1519,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, NewValues.push_back(RewriteVal); } - +#if 0 // Set names for input and output arguments. if (!AggregateArgs) { AI = newFunction->arg_begin(); @@ -1582,6 +1528,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } +#endif std::vector ReloadOutputs; std::vector Reloads; @@ -1819,10 +1766,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // const DataLayout &DL = M->getDataLayout(); - // TOOD: Pass AllocaBlock - // BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); - - + // Emit the call to the function CallInst * call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "",codeReplacer); @@ -1850,7 +1794,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // SmallVector AfterCall; - + ValueToValueMapTy VMap; // Reload the outputs passed in by reference. From b72ec472106ad31f0b5c6e580b7b573c5ae00215 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:49:42 -0600 Subject: [PATCH 030/130] Inlined extractCodeRegionByCopy --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 634 ++++++++++++++++++++ 1 file changed, 634 insertions(+) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cbce57ca86d05..1c5a8bc12e068 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1669,6 +1669,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { +#if 0 extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode, params, StructValues, @@ -1677,6 +1678,639 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, StructArgTy, Struct, {} ); +#endif + + + + + auto newHeader = codeReplacer; + ValueToValueMapTy VMap; + Module* M = oldFunction->getParent(); + auto KeepOldBlocks = true; + + + + + + + // TODO: Make StructTy a field + StructType* StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + //StructTy = StructType::get(M->getContext(), paramTy); + StructTy = cast(newFunction->getArg(0)->getType()); + } + + + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* RewriteVal; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction* TI = newFunction->begin()->getTerminator(); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } + else + RewriteVal = &*AI++; + + if (KeepOldBlocks) { + auto In = inputs[i]; + VMap[In] = RewriteVal; + } + else { + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } +#if 0 + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName() + ".out"); + } +#endif + + if (false) header->getParent()->viewCFG(); + + if (!KeepOldBlocks) { + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + } + + //return newFunction; + + BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); + auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + + // Recursive calls to oldFunction still call the old Function from extracted function. + + VMap[oldFunction] = oldFunction; + + +#if 0 + CallInst* TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs, true, VMap); + /* + CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { + */ +#else + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + // std::vector ReloadOutputs, Reloads; + + // Module *M = newFunction->getParent(); + LLVMContext& Context = M->getContext(); + // const DataLayout& DL = M->getDataLayout(); + CallInst* call = nullptr; + +#if 0 + BasicBlock* AllocaBlock; + if (KeepOldBlocks) { + AllocaBlock = &newFunction->front(); + } + else { + AllocaBlock = &codeReplacer->getParent()->front(); + } +#endif + +#if 0 + auto NewAlloca = [&](Type* Ty, unsigned AddrSpace, Value* ArraySize, + const Twine& Name) { + if (!KeepOldBlocks) + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &codeReplacer->getParent()->front().front()); + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); + }; +#endif + + +#if 0 + // Create allocas for the outputs + for (Value* output : outputs) { + if (AggregateArgs) { + StructValues.push_back(output); + } + else { + AllocaInst* alloca = + // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); +#if 1 + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &codeReplacer->getParent()->front().front()); +#endif + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } +#endif + +#if 0 + StructType* StructArgTy = nullptr; + AllocaInst* Struct = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector ArgTypes; + for (Value* V : StructValues) + ArgTypes.push_back(V->getType()); + + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); +#if 1 + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", + &codeReplacer->getParent()->front().front()); +#endif + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } +#endif + + // Emit the call to the function + call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + + + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + codeReplacer->getInstList().push_back(call); + + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + DenseMap ReloadReplacements; + SmallVector ReloadRepls; + DenseMap ReloadAddress; + // DenseMap SpillAddress; + +#if 1 + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value* Output = nullptr; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } + else { + Output = ReloadOutputs[i]; + } + ReloadAddress[outputs[i]] = Output; + + // new StoreInst(outputs[i]->getType(), Output, ); + + // SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); + + + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + //ReloadReplacements[outputs[i]] + + if (KeepOldBlocks) { + auto OrigOut = outputs[i]; + //VMap[Out] = load; + ReloadReplacements[OrigOut] = load; + ReloadRepls.push_back(load); + + // Remove all PHIs; will need to be recreated by SSAUpdater; + } else { + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } +#endif + + // Now we can emit a switch statement using the call as a value. + SwitchInst* TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + + // auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock* Block : Blocks) { + BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); + + + for (auto&& P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); + } + } + } + + + + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. + unsigned switchVal = 0; + for (BasicBlock* OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) { + // llvm_unreachable("Happens if e.g. switch has multiple edges to target"); + continue; + } + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + unsigned SuccNum = switchVal++; + + Value* brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + + // auto OldPredecessor = OldTarget->getUniquePredecessor(); + +#if 0 + if (KeepOldBlocks) { + for (auto&& P : OldTarget->phis()) { + auto Val = P.getIncomingValueForBlock(OldTarget); + Value *PHINewVal = Val; + if (auto X = ReloadReplacements.lookup(Val)) + PHINewVal = X; + P.addIncoming(PHINewVal, codeReplacer); + } + } +#endif + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + + + + +#if 0 + if (KeepOldBlocks) { + // for (auto T : OldTargets) { + DenseMap OutRepl; + for (auto&& P : OldTarget->phis()) { + int NumIncoming = P.getNumIncomingValues(); + for (int i = 0; i < NumIncoming; ++i) { + auto OldVal = P.getIncomingValue(i); + auto ReplVal = ReloadReplacements.lookup(OldVal); + if (ReplVal) { + P.addIncoming(ReplVal, codeReplacer); + OutRepl[OldVal] = &P; + break; + } + } + } + + + SmallPtrSet OriginalPreds; + for (auto Pred : predecessors(OldTarget)) { + if (Blocks.count(Pred)) continue; + if (Pred == codeReplacer)continue; + OriginalPreds.insert(Pred); + } + + if (OriginalPreds.size() == 1) { + auto OldPredecessor = *OriginalPreds.begin(); + for (auto&& O : outputs) { + auto& PHI = OutRepl[O]; + if (!PHI) { + auto ReplVal = ReloadReplacements.lookup(O); + PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); + PHI->addIncoming(O, OldPredecessor); + PHI->addIncoming(ReplVal, codeReplacer); + } + + + +#if 0 + for (auto&& U : make_early_inc_range(O->uses())) { + auto* User = dyn_cast(U.getUser()); + if (!User) continue; + //if (!DT->dominates(OldTarget, User->getParent())) continue; + if (VMap.lookup(User)) continue; + if (Blocks.count(User->getParent())) continue; + if (User->getParent()->getParent() != oldFunction) continue; + // if (User->getParent() == OldTarget && isa(User)) continue; + if (auto P = dyn_cast(User)) { + auto Incoming = P->getIncomingBlock(U.getOperandNo()); + if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; + } + + if (!PHI) { + auto ReplVal = ReloadReplacements.lookup(O); + PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); + PHI->addIncoming(O, OldPredecessor); + PHI->addIncoming(ReplVal, codeReplacer); + } + + U.set(PHI); + } +#endif + } + } + } +#endif + } + + + // for (auto&& O : outputs) { } + + + + + //if (!KeepOldBlocks) +#if 1 + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + // rewrite the original branch instruction with this new target + // TI->setSuccessor(i, NewTarget); + VMap[OldTarget] = NewTarget; + } + } +#endif + + + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + // insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + // auto TheCall =call; +#endif + + // Function *oldFunc =oldFunction; + // Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); + // Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + +#if 0 + DebugInfoFinder DIFinder; + assert((newFunction->getParent() == nullptr || + newFunction->getParent() == oldFunc->getParent()) && + "Expected NewFunc to have the same parent, or no parent"); + if (DISubprogram * SPClonedWithinModule = oldFunc->getSubprogram()) + DIFinder.processSubprogram(SPClonedWithinModule); +#endif + + + + + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newRootNode; + } + + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock* Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + BasicBlock &Y = cast (*NewBlock); + + // Loop over all instructions, fixing each one as we find it... + for (Instruction& II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + + + + + // Must be done after remap + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + auto OutIdx = P.index(); + auto OldVal = cast( P.value()); + auto NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (auto &&U : make_early_inc_range(OldVal->uses())) { + auto User = dyn_cast(U.getUser()); + if (!User) continue; + auto EffectiveUser = User->getParent(); + if (auto &&P = dyn_cast(User)) { + EffectiveUser= P->getIncomingBlock(U); + } + + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; + + + SSA.RewriteUseAfterInsertions(U); + } + } + + + + + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; + } + } + + + BasicBlock* HeaderCopy = cast( VMap.lookup(header)); + assert(HeaderCopy); + auto *BranchI2 = BranchInst::Create(HeaderCopy, newRootNode); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll // TODO: remove assumes only after moving From e9c6c69cef85716d1680953eab1fa33a34cd7a20 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 18:51:40 -0600 Subject: [PATCH 031/130] WIP --- .../llvm/Transforms/Utils/CodeExtractor.h | 17 - llvm/lib/Transforms/Utils/CodeExtractor.cpp | 677 +----------------- 2 files changed, 13 insertions(+), 681 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 6be332813e6a8..3ac193200c307 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -261,23 +261,6 @@ class CodeExtractorAnalysisCache { void canonicalizeForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); - void extractCodeRegionByCopy(const CodeExtractorAnalysisCache &CEAC, - ValueSet &Inputs, ValueSet &Outputs, - const BlockFrequency& EntryFreq, - const DenseMap &ExitWeights, const SmallPtrSet &ExitBlocks, - const ValueSet &SinkingCands,const ValueSet & HoistingCands, BasicBlock *CommonExit, - Function *oldFunction, Function *newFunction, BasicBlock *header, - BasicBlock * codeReplacer, - BasicBlock * NewEntry, - BasicBlock * newRootNode , - std::vector ¶ms, - std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads, - StructType *StructArgTy , - AllocaInst *Struct , - function_ref MakeReloadAddress - ); void moveCodeToFunction(Function *newFunction); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1c5a8bc12e068..aace32e051755 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1469,6 +1469,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + + + + + + + + + + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1483,7 +1494,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); - auto newHeader = codeReplacer; + //auto newHeader = codeReplacer; @@ -1664,22 +1675,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (Instruction* I = dyn_cast(U)) if (I->isTerminator() && I->getFunction() == oldFunction && !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + I->replaceUsesOfWith(header, codeReplacer); if (KeepOldBlocks) { -#if 0 - extractCodeRegionByCopy(CEAC, inputs, outputs, EntryFreq, ExitWeights, ExitBlocks, SinkingCands, HoistingCands, CommonExit, oldFunction, newFunction,header, codeReplacer, nullptr, newRootNode, - params, - StructValues, - SwiftErrorArgs,ReloadOutputs, - Reloads, - StructArgTy, Struct, - {} - ); -#endif - @@ -2700,657 +2700,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } -void CodeExtractor::extractCodeRegionByCopy(const CodeExtractorAnalysisCache& CEAC, ValueSet& inputs, ValueSet& outputs, const BlockFrequency& EntryFreq, - const DenseMap& ExitWeights, - const SmallPtrSet& ExitBlocks, - const ValueSet& SinkingCands, const ValueSet& HoistingCands, BasicBlock* CommonExit, - Function* oldFunction, Function* newFunction, BasicBlock* header, - BasicBlock* codeReplacer, - BasicBlock* NewEntry, BasicBlock* newRootNode, - std::vector ¶ms, - std::vector &StructValues, - SmallVectorImpl &SwiftErrorArgs, - std::vector & ReloadOutputs,std::vector & Reloads, - StructType *StructArgTy , - AllocaInst *Struct , - function_ref MakeReloadAddress -) { - // Assumption: this is a single-entry code region, and the header is the first block in the region. - // BasicBlock *header = *Blocks.begin(); - - - - - - auto newHeader = codeReplacer; - ValueToValueMapTy VMap; - Module* M = oldFunction->getParent(); - auto KeepOldBlocks = true; - - - - - - - // TODO: Make StructTy a field - StructType* StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - //StructTy = StructType::get(M->getContext(), paramTy); - StructTy = cast(newFunction->getArg(0)->getType()); - } - - - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* RewriteVal; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction* TI = newFunction->begin()->getTerminator(); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } - else - RewriteVal = &*AI++; - - if (KeepOldBlocks) { - auto In = inputs[i]; - VMap[In] = RewriteVal; - } - else { - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } -#if 0 - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName() + ".out"); - } -#endif - - if (false) header->getParent()->viewCFG(); - - if (!KeepOldBlocks) { - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); - } - - //return newFunction; - - BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); - auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - - // Recursive calls to oldFunction still call the old Function from extracted function. - - VMap[oldFunction] = oldFunction; - - -#if 0 - CallInst* TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs, true, VMap); - /* - CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { - */ -#else - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - // std::vector ReloadOutputs, Reloads; - - // Module *M = newFunction->getParent(); - LLVMContext& Context = M->getContext(); - // const DataLayout& DL = M->getDataLayout(); - CallInst* call = nullptr; - -#if 0 - BasicBlock* AllocaBlock; - if (KeepOldBlocks) { - AllocaBlock = &newFunction->front(); - } - else { - AllocaBlock = &codeReplacer->getParent()->front(); - } -#endif - -#if 0 - auto NewAlloca = [&](Type* Ty, unsigned AddrSpace, Value* ArraySize, - const Twine& Name) { - if (!KeepOldBlocks) - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &codeReplacer->getParent()->front().front()); - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); - }; -#endif - - -#if 0 - // Create allocas for the outputs - for (Value* output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } - else { - AllocaInst* alloca = - // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); -#if 1 - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); -#endif - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } -#endif - -#if 0 - StructType* StructArgTy = nullptr; - AllocaInst* Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value* V : StructValues) - ArgTypes.push_back(V->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); -#if 1 - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); -#endif - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } -#endif - - // Emit the call to the function - call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); - - - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - DenseMap ReloadReplacements; - SmallVector ReloadRepls; - DenseMap ReloadAddress; - // DenseMap SpillAddress; - -#if 1 - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* Output = nullptr; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } - else { - Output = ReloadOutputs[i]; - } - ReloadAddress[outputs[i]] = Output; - - // new StoreInst(outputs[i]->getType(), Output, ); - - // SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); - - - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - //ReloadReplacements[outputs[i]] - - if (KeepOldBlocks) { - auto OrigOut = outputs[i]; - //VMap[Out] = load; - ReloadReplacements[OrigOut] = load; - ReloadRepls.push_back(load); - - // Remove all PHIs; will need to be recreated by SSAUpdater; - } else { - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } -#endif - - // Now we can emit a switch statement using the call as a value. - SwitchInst* TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - - // auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock* Block : Blocks) { - BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - - for (auto&& P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); - } - } - } - - - - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock* OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) { - // llvm_unreachable("Happens if e.g. switch has multiple edges to target"); - continue; - } - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; - - Value* brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // auto OldPredecessor = OldTarget->getUniquePredecessor(); - -#if 0 - if (KeepOldBlocks) { - for (auto&& P : OldTarget->phis()) { - auto Val = P.getIncomingValueForBlock(OldTarget); - Value *PHINewVal = Val; - if (auto X = ReloadReplacements.lookup(Val)) - PHINewVal = X; - P.addIncoming(PHINewVal, codeReplacer); - } - } -#endif - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - - - - - -#if 0 - if (KeepOldBlocks) { - // for (auto T : OldTargets) { - DenseMap OutRepl; - for (auto&& P : OldTarget->phis()) { - int NumIncoming = P.getNumIncomingValues(); - for (int i = 0; i < NumIncoming; ++i) { - auto OldVal = P.getIncomingValue(i); - auto ReplVal = ReloadReplacements.lookup(OldVal); - if (ReplVal) { - P.addIncoming(ReplVal, codeReplacer); - OutRepl[OldVal] = &P; - break; - } - } - } - - - SmallPtrSet OriginalPreds; - for (auto Pred : predecessors(OldTarget)) { - if (Blocks.count(Pred)) continue; - if (Pred == codeReplacer)continue; - OriginalPreds.insert(Pred); - } - - if (OriginalPreds.size() == 1) { - auto OldPredecessor = *OriginalPreds.begin(); - for (auto&& O : outputs) { - auto& PHI = OutRepl[O]; - if (!PHI) { - auto ReplVal = ReloadReplacements.lookup(O); - PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); - PHI->addIncoming(O, OldPredecessor); - PHI->addIncoming(ReplVal, codeReplacer); - } - - - -#if 0 - for (auto&& U : make_early_inc_range(O->uses())) { - auto* User = dyn_cast(U.getUser()); - if (!User) continue; - //if (!DT->dominates(OldTarget, User->getParent())) continue; - if (VMap.lookup(User)) continue; - if (Blocks.count(User->getParent())) continue; - if (User->getParent()->getParent() != oldFunction) continue; - // if (User->getParent() == OldTarget && isa(User)) continue; - if (auto P = dyn_cast(User)) { - auto Incoming = P->getIncomingBlock(U.getOperandNo()); - if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; - } - - if (!PHI) { - auto ReplVal = ReloadReplacements.lookup(O); - PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); - PHI->addIncoming(O, OldPredecessor); - PHI->addIncoming(ReplVal, codeReplacer); - } - - U.set(PHI); - } -#endif - } - } - } -#endif - } - - - // for (auto&& O : outputs) { } - - - - - //if (!KeepOldBlocks) -#if 1 - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - // rewrite the original branch instruction with this new target - // TI->setSuccessor(i, NewTarget); - VMap[OldTarget] = NewTarget; - } - } -#endif - - - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - // insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - // auto TheCall =call; -#endif - - // Function *oldFunc =oldFunction; - // Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); - // Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - -#if 0 - DebugInfoFinder DIFinder; - assert((newFunction->getParent() == nullptr || - newFunction->getParent() == oldFunc->getParent()) && - "Expected NewFunc to have the same parent, or no parent"); - if (DISubprogram * SPClonedWithinModule = oldFunc->getSubprogram()) - DIFinder.processSubprogram(SPClonedWithinModule); -#endif - - - - - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newRootNode; - } - - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock* Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock &Y = cast (*NewBlock); - - // Loop over all instructions, fixing each one as we find it... - for (Instruction& II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - - - - - // Must be done after remap - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - auto OutIdx = P.index(); - auto OldVal = cast( P.value()); - auto NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (auto &&U : make_early_inc_range(OldVal->uses())) { - auto User = dyn_cast(U.getUser()); - if (!User) continue; - auto EffectiveUser = User->getParent(); - if (auto &&P = dyn_cast(User)) { - EffectiveUser= P->getIncomingBlock(U); - } - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; - - - SSA.RewriteUseAfterInsertions(U); - } - } - - - - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } - } - - - BasicBlock* HeaderCopy = cast( VMap.lookup(header)); - assert(HeaderCopy); - auto *BranchI2 = BranchInst::Create(HeaderCopy, newRootNode); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); -} From b6b95a6152f15bfb612fc20d79357e9a5b8c0323 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:00:38 -0600 Subject: [PATCH 032/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 84 ++++++++++----------- 1 file changed, 38 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index aace32e051755..cac95d182338e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1411,18 +1411,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // block in the region. BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); + Module* M = oldFunction->getParent(); + const DataLayout& DL = M->getDataLayout(); - // SmallPtrSet ExitBlocks; - - // analyzeBeforeExtraction(CEAC,inputs, outputs, EntryFreq,ExitWeights,ExitBlocks); - - - // canonicalization canonicalizeForExtraction(header, KeepOldBlocks); @@ -1485,18 +1481,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + //// CodeGen newFunction implementation /////////////////////////////////////////////////// + // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - BasicBlock *newRootNode=newFuncRoot; - - // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); - //auto newHeader = codeReplacer; + @@ -1507,43 +1502,32 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - + // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); // Rewrite all users of the inputs in the extracted region to use the // arguments (or appropriate addressing into struct) instead. SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; + Value* RewriteVal; if (AggregateArgs) { - Value *Idx[2]; + Value* Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + Instruction* TI = newFunction->begin()->getTerminator(); + GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, "loadgep_" + inputs[i]->getName(), TI); - } else + } + else RewriteVal = &*AI++; NewValues.push_back(RewriteVal); } + -#if 0 - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); - } -#endif - - std::vector ReloadOutputs; - std::vector Reloads; + //// Codegen newFunction call ////////////////////////////////////////////// // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; @@ -1561,10 +1545,21 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ++ArgNo; } - - BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); - Module* M = oldFunction->getParent(); - const DataLayout& DL = M->getDataLayout(); + + //////////////////////////////////////////////////////////////////////////// + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + + + + + // BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); + BasicBlock * AllocaBlock = &oldFunction->front(); + + + std::vector ReloadOutputs; + std::vector Reloads; // Create allocas for the outputs for (Value *output : outputs) { @@ -1680,13 +1675,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - - - - auto newHeader = codeReplacer; + // auto newHeader = codeReplacer; ValueToValueMapTy VMap; - Module* M = oldFunction->getParent(); - auto KeepOldBlocks = true; + // Module* M = oldFunction->getParent(); + // auto KeepOldBlocks = true; @@ -1758,13 +1750,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (Instruction* I = dyn_cast(U)) if (I->isTerminator() && I->getFunction() == oldFunction && !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + I->replaceUsesOfWith(header, codeReplacer); } //return newFunction; - BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newRootNode); - auto BranchI = BranchInst::Create(newRootNode, AllocaBlock); + BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newFuncRoot); + auto BranchI = BranchInst::Create(newFuncRoot, AllocaBlock); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Recursive calls to oldFunction still call the old Function from extracted function. @@ -2208,7 +2200,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (auto Pred : predecessors(header)) { if (VMap.count(Pred)) continue; - VMap[Pred] = newRootNode; + VMap[Pred] = newFuncRoot; } @@ -2309,7 +2301,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock* HeaderCopy = cast( VMap.lookup(header)); assert(HeaderCopy); - auto *BranchI2 = BranchInst::Create(HeaderCopy, newRootNode); + auto *BranchI2 = BranchInst::Create(HeaderCopy, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); } else { // Transforms/HotColdSplit/stale-assume-in-original-func.ll From a13a7e96fa2ce1d75291c1e3f08dca20ed983e56 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:14:00 -0600 Subject: [PATCH 033/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cac95d182338e..b1a752359e6c9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1527,7 +1527,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - //// Codegen newFunction call ////////////////////////////////////////////// + //// Codegen newFunction call replacement ////////////////////////////////////////////// // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; @@ -1546,12 +1546,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - //////////////////////////////////////////////////////////////////////////// - // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + //////////////////////////////////////////////////////////////////////////// + + + // BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); From b9bf259d21492bce5d9b3e8911d929470e390dc2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:17:43 -0600 Subject: [PATCH 034/130] Moving arguments --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 68 +++++++++++---------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index b1a752359e6c9..270b1790bbc0e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1412,6 +1412,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); Module* M = oldFunction->getParent(); + LLVMContext &Context = M->getContext(); const DataLayout& DL = M->getDataLayout(); @@ -1419,6 +1420,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + canonicalizeForExtraction(header, KeepOldBlocks); @@ -1529,59 +1531,63 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// Codegen newFunction call replacement ////////////////////////////////////////////// + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + BasicBlock * AllocaBlock = &oldFunction->front(); + // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; - std::vector params; std::vector StructValues; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); + if (AggregateArgs) { + for (Value* input : inputs) { + StructValues.push_back(input); + ++ArgNo; } - ++ArgNo; } - - // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); - - - //////////////////////////////////////////////////////////////////////////// - - - - - - // BasicBlock * AllocaBlock = &codeReplacer->getParent()->front(); - BasicBlock * AllocaBlock = &oldFunction->front(); + std::vector params; + SmallVector SwiftErrorArgs; + if (!AggregateArgs) { + for (Value* input : inputs) { + params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ArgNo); + ++ArgNo; + } + } std::vector ReloadOutputs; std::vector Reloads; // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { + if (AggregateArgs) { + for (Value* output : outputs) { StructValues.push_back(output); - } else { - AllocaInst *alloca = - // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); -#if 1 + } + } + if (!AggregateArgs) { + for (Value* output : outputs) { + AllocaInst* alloca = new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc", &AllocaBlock->front()); -#endif ReloadOutputs.push_back(alloca); params.push_back(alloca); } } - LLVMContext &Context = M->getContext(); + //////////////////////////////////////////////////////////////////////////// + + + + + + + + + StructType *StructArgTy = nullptr; From 2c8d3603c7bf76115a57a9efdf94113c20d1bc0b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:21:31 -0600 Subject: [PATCH 035/130] Moving struct arguments --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 44 ++++++++------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 270b1790bbc0e..736f674e5ba25 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1491,13 +1491,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - - - - - - - StructType *StructTy = nullptr; if (AggregateArgs && newFunction->arg_size() > 0) StructTy = cast(newFunction->getArg(0)->getType()); @@ -1533,7 +1526,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); - BasicBlock * AllocaBlock = &oldFunction->front(); + BasicBlock *AllocaBlock = &oldFunction->front(); // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; @@ -1577,34 +1570,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - - //////////////////////////////////////////////////////////////////////////// - - - - - - - - - - - - StructType *StructArgTy = nullptr; + StructType *StructArgTy = StructTy; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector ArgTypes; for (Value *V : StructValues) ArgTypes.push_back(V->getType()); - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); -#if 1 + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", &AllocaBlock->front()); -#endif + params.push_back(Struct); for (unsigned i = 0, e = inputs.size(); i != e; ++i) { @@ -1617,6 +1594,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + + + //////////////////////////////////////////////////////////////////////////// + + + + + + + + + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) From 2c2c233762582abaa25e504d39fb2968c6d96dbe Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:29:28 -0600 Subject: [PATCH 036/130] Moving more arguments --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 115 +++++++------------- 1 file changed, 38 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 736f674e5ba25..d055683433bc9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1492,7 +1492,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, StructType *StructTy = nullptr; - if (AggregateArgs && newFunction->arg_size() > 0) + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) StructTy = cast(newFunction->getArg(0)->getType()); @@ -1528,17 +1528,39 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); BasicBlock *AllocaBlock = &oldFunction->front(); + + // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; - std::vector StructValues; - if (AggregateArgs) { + std::vector params; + + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructTy) { + std::vector StructValues; for (Value* input : inputs) { StructValues.push_back(input); ++ArgNo; } + + + + Struct = new AllocaInst(StructTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", + &AllocaBlock->front()); + + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } } - std::vector params; + SmallVector SwiftErrorArgs; if (!AggregateArgs) { for (Value* input : inputs) { @@ -1550,15 +1572,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + + + std::vector ReloadOutputs; std::vector Reloads; // Create allocas for the outputs - if (AggregateArgs) { - for (Value* output : outputs) { - StructValues.push_back(output); - } - } if (!AggregateArgs) { for (Value* output : outputs) { AllocaInst* alloca = @@ -1570,31 +1590,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - StructType *StructArgTy = StructTy; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &AllocaBlock->front()); - - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); //////////////////////////////////////////////////////////////////////////// @@ -1607,48 +1610,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - -#if 0 - //using InsertPointTy = IRBuilder<>::InsertPoint; - // IRBuilder<> Builder(Context); - auto MakeReloadAddress = [&](int i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - //Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - return Output; - }; -#endif - -#if 0 - // Undo SSA for output values after the extracted region before dominator analysis is invalidated. - if (KeepOldBlocks) { - for (auto P : enumerate(outputs)) { - auto Idx = P.index(); - auto OutVal = P.value(); - - for (auto &&E : ExitBlocks) { - Builder.SetInsertPoint(E->getTerminator()); - auto Attr = MakeReloadAddress(Idx); - Builder.CreateStore(OutVal, ); - } - } - } -#endif - - // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); @@ -1890,7 +1851,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } @@ -2102,7 +2063,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - // for (auto&& O : outputs) { } + @@ -2284,7 +2245,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); new StoreInst(OutI, GEP, InsertBefore); // Since there should be only one struct argument aggregating @@ -2430,7 +2391,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); //Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { @@ -2557,7 +2518,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); new StoreInst(outputs[i], GEP, InsertBefore); // Since there should be only one struct argument aggregating From bd187ae8f930869df5218ce39aceddaa0b3f8827 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:42:02 -0600 Subject: [PATCH 037/130] Refactor call parameter codegen --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 95 ++++++--------------- 1 file changed, 26 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d055683433bc9..1c0f838f6b0da 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1501,6 +1501,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); + // Rewrite all users of the inputs in the extracted region to use the // arguments (or appropriate addressing into struct) instead. SmallVector NewValues; @@ -1529,6 +1530,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *AllocaBlock = &oldFunction->front(); + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount(ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } + + + + + // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; @@ -1562,6 +1575,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SmallVector SwiftErrorArgs; + std::vector ReloadOutputs; + std::vector Reloads; if (!AggregateArgs) { for (Value* input : inputs) { params.push_back(input); @@ -1569,17 +1584,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SwiftErrorArgs.push_back(ArgNo); ++ArgNo; } - } - - - + - std::vector ReloadOutputs; - std::vector Reloads; // Create allocas for the outputs - if (!AggregateArgs) { for (Value* output : outputs) { AllocaInst* alloca = new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), @@ -1592,32 +1601,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); - //////////////////////////////////////////////////////////////////////////// - - - + //////////////////////////////////////////////////////////////////////////// - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which @@ -1633,22 +1629,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - if (KeepOldBlocks) { - // auto newHeader = codeReplacer; - ValueToValueMapTy VMap; - // Module* M = oldFunction->getParent(); - // auto KeepOldBlocks = true; - - - + if (KeepOldBlocks) { + ValueToValueMapTy VMap; - // TODO: Make StructTy a field - StructType* StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - //StructTy = StructType::get(M->getContext(), paramTy); - StructTy = cast(newFunction->getArg(0)->getType()); + for (auto&& P : enumerate(inputs)) { + VMap[P.value()] = NewValues[P.index()]; } @@ -1656,6 +1643,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); +#if 0 // Rewrite all users of the inputs in the extracted region to use the // arguments (or appropriate addressing into struct) instead. for (unsigned i = 0, e = inputs.size(); i != e; ++i) { @@ -1685,34 +1673,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, inst->replaceUsesOfWith(inputs[i], RewriteVal); } } -#if 0 - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName() + ".out"); - } #endif - if (false) header->getParent()->viewCFG(); - - if (!KeepOldBlocks) { - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, codeReplacer); - } + - //return newFunction; BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newFuncRoot); auto BranchI = BranchInst::Create(newFuncRoot, AllocaBlock); @@ -1723,15 +1687,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, VMap[oldFunction] = oldFunction; -#if 0 - CallInst* TheCall = emitCallAndSwitchStatement(newFunction, newRootNode, inputs, outputs, true, VMap); - /* - CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs, bool KeepOldBlocks, ValueToValueMapTy &VMap) { - */ -#else + + // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs // std::vector ReloadOutputs, Reloads; @@ -2138,7 +2095,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); // auto TheCall =call; -#endif + // Function *oldFunc =oldFunction; // Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); From 02adf99de7cd57ea2bb9fa62f1b8815f82790ace Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:48:00 -0600 Subject: [PATCH 038/130] Remove unnecessary entry block --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 45 +++---------------- .../llvm-extract/extract-block-cleanup.ll | 3 -- .../extract-block-multiple-exits.ll | 3 -- 3 files changed, 5 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1c0f838f6b0da..665e177adcc35 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1610,7 +1610,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - //////////////////////////////////////////////////////////////////////////// + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -1640,47 +1640,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - -#if 0 - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* RewriteVal; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction* TI = newFunction->begin()->getTerminator(); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } - else - RewriteVal = &*AI++; - - if (KeepOldBlocks) { - auto In = inputs[i]; - VMap[In] = RewriteVal; - } - else { - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } -#endif - - + - BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newFuncRoot); - auto BranchI = BranchInst::Create(newFuncRoot, AllocaBlock); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); + // BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newFuncRoot); + //auto BranchI = BranchInst::Create(newFuncRoot, AllocaBlock); + // applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Recursive calls to oldFunction still call the old Function from extracted function. diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll index 971a48b67d579..bbf656fe696f6 100644 --- a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -48,9 +48,6 @@ ; CHECK-LABEL: define internal i1 @foo.region_start(i32* %arg) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %newFuncRoot -; CHECK-EMPTY: ; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index 25540200a15b1..f40f48eefb0b0 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -88,9 +88,6 @@ ; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.out, i32* %b.out, i32* %c.out, i32* %B.ce.out) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %newFuncRoot -; CHECK-EMPTY: ; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: From e94ad8f930fb7460a46fc4003a2f85d7ddbfda4d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:50:39 -0600 Subject: [PATCH 039/130] Remove dead code --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 207 +------------------- 1 file changed, 7 insertions(+), 200 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 665e177adcc35..4c74061613e1b 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1634,107 +1634,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { ValueToValueMapTy VMap; + for (auto&& P : enumerate(inputs)) { VMap[P.value()] = NewValues[P.index()]; } - - - - - // BasicBlock* AllocaBlock = BasicBlock::Create(header->getContext(), "entry", newFunction, newFuncRoot); - //auto BranchI = BranchInst::Create(newFuncRoot, AllocaBlock); - // applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - - // Recursive calls to oldFunction still call the old Function from extracted function. - - VMap[oldFunction] = oldFunction; - - - - - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - // std::vector ReloadOutputs, Reloads; - - // Module *M = newFunction->getParent(); - LLVMContext& Context = M->getContext(); - // const DataLayout& DL = M->getDataLayout(); - CallInst* call = nullptr; - -#if 0 - BasicBlock* AllocaBlock; - if (KeepOldBlocks) { - AllocaBlock = &newFunction->front(); - } - else { - AllocaBlock = &codeReplacer->getParent()->front(); - } -#endif - -#if 0 - auto NewAlloca = [&](Type* Ty, unsigned AddrSpace, Value* ArraySize, - const Twine& Name) { - if (!KeepOldBlocks) - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &codeReplacer->getParent()->front().front()); - return new AllocaInst(Ty, AddrSpace, ArraySize, Name, &newFunction->front().front()); - }; -#endif - - -#if 0 - // Create allocas for the outputs - for (Value* output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } - else { - AllocaInst* alloca = - // NewAlloca(output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc"); -#if 1 - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); -#endif - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } -#endif - -#if 0 - StructType* StructArgTy = nullptr; - AllocaInst* Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value* V : StructValues) - ArgTypes.push_back(V->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - // Struct = NewAlloca(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg"); -#if 1 - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); -#endif - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } -#endif - - // Emit the call to the function - call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); @@ -1782,14 +1688,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } ReloadAddress[outputs[i]] = Output; - // new StoreInst(outputs[i]->getType(), Output, ); - - // SpillAddress[outputs[i]] = new AllocaInst (outputs[i]->getType(), 0, outputs[i]->getName() + ".addr",&codeReplacer->getParent()->front().front()); - - + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); - //ReloadReplacements[outputs[i]] + if (KeepOldBlocks) { auto OrigOut = outputs[i]; @@ -1895,17 +1797,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // auto OldPredecessor = OldTarget->getUniquePredecessor(); -#if 0 - if (KeepOldBlocks) { - for (auto&& P : OldTarget->phis()) { - auto Val = P.getIncomingValueForBlock(OldTarget); - Value *PHINewVal = Val; - if (auto X = ReloadReplacements.lookup(Val)) - PHINewVal = X; - P.addIncoming(PHINewVal, codeReplacer); - } - } -#endif + // Update the switch instruction. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), @@ -1916,72 +1808,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, -#if 0 - if (KeepOldBlocks) { - // for (auto T : OldTargets) { - DenseMap OutRepl; - for (auto&& P : OldTarget->phis()) { - int NumIncoming = P.getNumIncomingValues(); - for (int i = 0; i < NumIncoming; ++i) { - auto OldVal = P.getIncomingValue(i); - auto ReplVal = ReloadReplacements.lookup(OldVal); - if (ReplVal) { - P.addIncoming(ReplVal, codeReplacer); - OutRepl[OldVal] = &P; - break; - } - } - } - - - SmallPtrSet OriginalPreds; - for (auto Pred : predecessors(OldTarget)) { - if (Blocks.count(Pred)) continue; - if (Pred == codeReplacer)continue; - OriginalPreds.insert(Pred); - } - - if (OriginalPreds.size() == 1) { - auto OldPredecessor = *OriginalPreds.begin(); - for (auto&& O : outputs) { - auto& PHI = OutRepl[O]; - if (!PHI) { - auto ReplVal = ReloadReplacements.lookup(O); - PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); - PHI->addIncoming(O, OldPredecessor); - PHI->addIncoming(ReplVal, codeReplacer); - } - - -#if 0 - for (auto&& U : make_early_inc_range(O->uses())) { - auto* User = dyn_cast(U.getUser()); - if (!User) continue; - //if (!DT->dominates(OldTarget, User->getParent())) continue; - if (VMap.lookup(User)) continue; - if (Blocks.count(User->getParent())) continue; - if (User->getParent()->getParent() != oldFunction) continue; - // if (User->getParent() == OldTarget && isa(User)) continue; - if (auto P = dyn_cast(User)) { - auto Incoming = P->getIncomingBlock(U.getOperandNo()); - if (Incoming == codeReplacer || Blocks.count(Incoming)) continue; - } - - if (!PHI) { - auto ReplVal = ReloadReplacements.lookup(O); - PHI = PHINode::Create(O->getType(), 2, O->getName() + ".merge_new_and_old", OldTarget->getFirstNonPHI()); - PHI->addIncoming(O, OldPredecessor); - PHI->addIncoming(ReplVal, codeReplacer); - } - - U.set(PHI); - } -#endif - } - } - } -#endif } @@ -1990,8 +1817,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - //if (!KeepOldBlocks) -#if 1 + for (BasicBlock *Block : Blocks) { Instruction *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -2007,7 +1833,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, VMap[OldTarget] = NewTarget; } } -#endif + @@ -2055,25 +1881,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, break; } - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - // insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - // auto TheCall =call; - - - // Function *oldFunc =oldFunction; - // Function::BasicBlockListType &oldBlocks = oldFunction->getBasicBlockList(); - // Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - -#if 0 - DebugInfoFinder DIFinder; - assert((newFunction->getParent() == nullptr || - newFunction->getParent() == oldFunc->getParent()) && - "Expected NewFunc to have the same parent, or no parent"); - if (DISubprogram * SPClonedWithinModule = oldFunc->getSubprogram()) - DIFinder.processSubprogram(SPClonedWithinModule); -#endif From 29c5769b14479493eca540a23172acd4645a155d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 19:54:28 -0600 Subject: [PATCH 040/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 4c74061613e1b..70a6bd49cb37c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2067,14 +2067,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, -#if 0 - CallInst* TheCall = emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs, false, VMap, - params, - StructValues, - SwiftErrorArgs,ReloadOutputs,Reloads, - StructArgTy, Struct - ); -#else + Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); // const DataLayout &DL = M->getDataLayout(); @@ -2307,8 +2300,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // allocas output values are stored in are only in-use in the codeRepl block. insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - CallInst* TheCall =call; -#endif + // CallInst* TheCall =call; + moveCodeToFunction(newFunction); @@ -2317,7 +2310,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, call); // TODO: ByCopy @@ -2354,7 +2347,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); } // Mark the new function `noreturn` if applicable. Terminators which resume From 6b0860c73ae8a1a35990317cc93e13ea04ccaf19 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Dec 2021 22:33:04 -0600 Subject: [PATCH 041/130] Lifetime markers --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 91 ++++++++++-------- .../tools/llvm-extract/extract-block-sink.ll | 93 +++++++++++++++++++ 3 files changed, 148 insertions(+), 38 deletions(-) create mode 100644 llvm/test/tools/llvm-extract/extract-block-sink.ll diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 3ac193200c307..2c80627bad288 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -258,7 +258,7 @@ class CodeExtractorAnalysisCache { - void canonicalizeForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); + void canonicalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 70a6bd49cb37c..a14d1e41de1d6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1320,7 +1320,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool Ke -void CodeExtractor::canonicalizeForExtraction(BasicBlock *&Header,bool NoExitBlockPHIs) { +void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header,bool NoExitBlockPHIs) { // BasicBlock *header = *Blocks.begin(); // Function *oldFunction = header->getParent(); @@ -1365,6 +1365,8 @@ void CodeExtractor::canonicalizeForExtraction(BasicBlock *&Header,bool NoExitBlo recomputeExitBlocks(); } + + } @@ -1421,7 +1423,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - canonicalizeForExtraction(header, KeepOldBlocks); + canonicalizeCFGForExtraction(header, KeepOldBlocks); + @@ -1473,11 +1476,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - - - // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1491,6 +1489,42 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); + + + + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst* FirstSunkAlloca = nullptr; + for (auto* II : SinkingCands) { + if (auto* AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); + for (auto* II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveAfter(FirstSunkAlloca); + } + } + + + + if (!HoistingCands.empty()) { + auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction* TI = HoistToBlock->getTerminator(); + for (auto* II : HoistingCands) + cast(II)->moveBefore(TI); + } + + + + + + + StructType *StructTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) StructTy = cast(newFunction->getArg(0)->getType()); @@ -1573,6 +1607,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + + SmallVector SwiftErrorArgs; std::vector ReloadOutputs; @@ -1634,11 +1670,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { ValueToValueMapTy VMap; - for (auto&& P : enumerate(inputs)) { VMap[P.value()] = NewValues[P.index()]; } + for (auto &&S : SinkingCands) { + VMap[S] = S; + } + CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); @@ -1670,7 +1709,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, DenseMap ReloadAddress; // DenseMap SpillAddress; -#if 1 + // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value* Output = nullptr; @@ -1709,7 +1748,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } } -#endif + // Now we can emit a switch statement using the call as a value. SwitchInst* TheSwitch = @@ -2007,37 +2046,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - auto* BranchI = BranchInst::Create(header, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - // newFuncRoot->getInstList().push_back(BranchI); + - // TODO: ByCopy - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - AllocaInst* FirstSunkAlloca = nullptr; - for (auto* II : SinkingCands) { - if (auto* AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; - } - } - assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); - for (auto* II : SinkingCands) { - if (!isa(II)) { - cast(II)->moveAfter(FirstSunkAlloca); - } - } + + + auto* BranchI = BranchInst::Create(header, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - if (!HoistingCands.empty()) { - auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction* TI = HoistToBlock->getTerminator(); - for (auto* II : HoistingCands) - cast(II)->moveBefore(TI); - } // TODO: ByCopy diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll new file mode 100644 index 0000000000000..71105f3fb599f --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -0,0 +1,93 @@ +; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + + +; CHECK-LABEL: define void @foo(i1 %c) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = alloca i32, align 4 +; CHECK-NEXT: %b = alloca i32, align 4 +; CHECK-NEXT: %A = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly +; CHECK-EMPTY: +; CHECK-NEXT: outsideonly: +; CHECK-NEXT: store i32 41, i32* %b, align 4 +; CHECK-NEXT: store i32 42, i32* %A, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) +; CHECK-NEXT: br label %region_start.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start.split: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; CHECK-LABEL: define internal void @foo.region_start(i32* %a, i32* %b, i32* %A) { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: region_start.split.exitStub: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + + + + + + +declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) +declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) + +define void @foo(i1 %c) { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %A = alloca i32, align 4 + %B = alloca i32, align 4 + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) + br i1 %c, label %region_start, label %outsideonly + +outsideonly: + store i32 41, i32* %b + store i32 42, i32* %A + br label %return + +region_start: + store i32 43, i32* %a + store i32 44, i32* %b + store i32 45, i32* %A + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) + store i32 46, i32* %B + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) + br label %return + +return: + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) + ret void +} From 7c5d0212e774904182ed91c7363a8ea5a8bd7a11 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 5 Dec 2021 04:04:34 -0600 Subject: [PATCH 042/130] fix sinking; TODO: lifetime markers --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 155 ++++++++++-------- .../tools/llvm-extract/extract-block-sink.ll | 84 +++++----- 2 files changed, 134 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index a14d1e41de1d6..8ebce0ecd32c5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1452,8 +1452,20 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - - + if (!KeepOldBlocks) { + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock* Block : Blocks) { + for (Instruction& I : llvm::make_early_inc_range(*Block)) { + if (auto* AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); + } + } + } + } @@ -1488,35 +1500,55 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - + ValueToValueMapTy VMap; + SmallVector AdditionalRemap; + auto MoveOrCopyInst = [KeepOldBlocks](Instruction *I, BasicBlock *IB, BasicBlock:: iterator IP) -> Instruction * { + if (KeepOldBlocks) { + auto AI= I->clone(); + AI->setName(I->getName()); + IB->getInstList().insert( IP, AI); + return AI; + } + I->moveBefore(*IB, IP); + return I; + }; + // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. - AllocaInst* FirstSunkAlloca = nullptr; + for (auto* II : SinkingCands) { - if (auto* AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; + if (!isa(II)) { + auto New = MoveOrCopyInst(cast(II),newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (KeepOldBlocks) { + AdditionalRemap.push_back(New); + VMap[II] = New; + } } } - assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); for (auto* II : SinkingCands) { - if (!isa(II)) { - cast(II)->moveAfter(FirstSunkAlloca); + if (auto* AI = dyn_cast(II)) { + AI = cast( MoveOrCopyInst(AI,newFuncRoot, newFuncRoot->getFirstInsertionPt())); + if (KeepOldBlocks) { + AdditionalRemap.push_back(AI); + VMap[II] = AI; + } } } + // assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); if (!HoistingCands.empty()) { auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Instruction* TI = HoistToBlock->getTerminator(); - for (auto* II : HoistingCands) + for (auto* II : HoistingCands) { + // MoveOrCopyInst(cast(II), HoistToBlock, TI->getIterator()); cast(II)->moveBefore(TI); + } } @@ -1668,18 +1700,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - ValueToValueMapTy VMap; + for (auto&& P : enumerate(inputs)) { VMap[P.value()] = NewValues[P.index()]; } - - for (auto &&S : SinkingCands) { +#if 0 + for (auto&& S : SinkingCands) { VMap[S] = S; } +#endif - - CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); @@ -1727,8 +1759,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } ReloadAddress[outputs[i]] = Output; - - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); + + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); @@ -1739,7 +1771,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ReloadRepls.push_back(load); // Remove all PHIs; will need to be recreated by SSAUpdater; - } else { + } + else { std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction* inst = cast(Users[u]); @@ -1842,29 +1875,23 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), OldTarget); - - - - - - } - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); + + for (BasicBlock* Block : Blocks) { + Instruction* TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { if (Blocks.count(TI->getSuccessor(i))) continue; - BasicBlock *OldTarget = TI->getSuccessor(i); + BasicBlock* OldTarget = TI->getSuccessor(i); // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + BasicBlock* NewTarget = ExitBlockMap[OldTarget]; assert(NewTarget && "Unknown target block!"); // rewrite the original branch instruction with this new target @@ -1877,7 +1904,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + Type* OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { case 0: // There are no successors (the block containing the switch itself), which @@ -1887,10 +1914,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + } + else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { + } + else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. ReturnInst::Create(Context, @@ -1916,7 +1945,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); break; } @@ -1931,6 +1960,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + for (Instruction* II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + // Loop over all of the instructions in the new function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (BasicBlock* Block : Blocks) { @@ -1938,9 +1970,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!NewBlock) { continue; } - BasicBlock &Y = cast (*NewBlock); + BasicBlock& Y = cast(*NewBlock); // Loop over all instructions, fixing each one as we find it... + for (Instruction& II : Y) RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } @@ -1952,7 +1985,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SSAUpdater SSA; for (auto P : enumerate(outputs)) { auto OutIdx = P.index(); - auto OldVal = cast( P.value()); + auto OldVal = cast(P.value()); auto NewVal = Reloads[OutIdx]; SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); @@ -1961,13 +1994,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Could help SSAUpdater by determining in advance which output values are available in which exit blocks (from DT). SSA.AddAvailableValue(OldVal->getParent(), OldVal); - for (auto &&U : make_early_inc_range(OldVal->uses())) { + for (auto&& U : make_early_inc_range(OldVal->uses())) { auto User = dyn_cast(U.getUser()); if (!User) continue; auto EffectiveUser = User->getParent(); - if (auto &&P = dyn_cast(User)) { - EffectiveUser= P->getIncomingBlock(U); - } + if (auto&& P = dyn_cast(User)) { + EffectiveUser = P->getIncomingBlock(U); + } if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; @@ -1985,7 +2018,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // result restore will be placed in the outlined function. Function::arg_iterator OAI = OutputArgBegin; for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); + auto* OutI = dyn_cast(outputs[i]); if (!OutI) continue; OutI = cast(VMap.lookup(OutI)); @@ -1994,14 +2027,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock::iterator InsertPt; // In case OutI is an invoke, we insert the store at the beginning in the // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) + if (auto* InvokeI = dyn_cast(OutI)) InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) + else if (auto* Phi = dyn_cast(OutI)) InsertPt = Phi->getParent()->getFirstInsertionPt(); else InsertPt = std::next(OutI->getIterator()); - Instruction *InsertBefore = &*InsertPt; + Instruction* InsertBefore = &*InsertPt; assert((InsertBefore->getFunction() == newFunction || Blocks.count(InsertBefore->getParent())) && "InsertPt should be in new function"); @@ -2009,42 +2042,36 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, "Number of output arguments should match " "the amount of defined values"); if (AggregateArgs) { - Value *Idx[2]; + Value* Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( + GetElementPtrInst* GEP = GetElementPtrInst::Create( StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); new StoreInst(OutI, GEP, InsertBefore); // Since there should be only one struct argument aggregating // all the output values, we shouldn't increment OAI, which always // points to the struct argument, in this case. - } else { + } + else { new StoreInst(OutI, &*OAI, InsertBefore); ++OAI; } } - BasicBlock* HeaderCopy = cast( VMap.lookup(header)); + BasicBlock* HeaderCopy = cast(VMap.lookup(header)); assert(HeaderCopy); - auto *BranchI2 = BranchInst::Create(HeaderCopy, newFuncRoot); + auto* BranchI2 = BranchInst::Create(HeaderCopy, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - } else { - // Transforms/HotColdSplit/stale-assume-in-original-func.ll - // TODO: remove assumes only after moving - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock* Block : Blocks) { - for (Instruction& I : llvm::make_early_inc_range(*Block)) { - if (auto* AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); - } - } + + if (!oldFunction) { + newFunction->viewCFG(); } + } else { + + @@ -2057,7 +2084,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // TODO: ByCopy + // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll index 71105f3fb599f..55b1aba77d184 100644 --- a/llvm/test/tools/llvm-extract/extract-block-sink.ll +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -3,57 +3,59 @@ ; CHECK-LABEL: define void @foo(i1 %c) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = alloca i32, align 4 -; CHECK-NEXT: %b = alloca i32, align 4 -; CHECK-NEXT: %A = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) -; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = alloca i32, align 4 +; CHECK-NEXT: %b = alloca i32, align 4 +; CHECK-NEXT: %A = alloca i32, align 4 +; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly ; CHECK-EMPTY: -; CHECK-NEXT: outsideonly: -; CHECK-NEXT: store i32 41, i32* %b, align 4 -; CHECK-NEXT: store i32 42, i32* %A, align 4 -; CHECK-NEXT: br label %return +; CHECK-NEXT: outsideonly: +; CHECK-NEXT: store i32 41, i32* %b, align 4 +; CHECK-NEXT: store i32 42, i32* %A, align 4 +; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) -; CHECK-NEXT: br label %region_start.split +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) +; CHECK-NEXT: br label %region_start.split ; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split ; CHECK-EMPTY: -; CHECK-NEXT: region_start.split: -; CHECK-NEXT: br label %return +; CHECK-NEXT: region_start.split: +; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: return: -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) -; CHECK-NEXT: ret void -; CHECK-NEXT: } +; CHECK-NEXT: return: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: ret void +; CHECK-NEXT: } ; CHECK-LABEL: define internal void @foo.region_start(i32* %a, i32* %b, i32* %A) { -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: %B = alloca i32, align 4 -; CHECK-NEXT: br label %region_start +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split.exitStub +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split.exitStub ; CHECK-EMPTY: -; CHECK-NEXT: region_start.split.exitStub: -; CHECK-NEXT: ret void -; CHECK-NEXT: } +; CHECK-NEXT: region_start.split.exitStub: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + From 82d5905c930c04a36393b826d423f0b93d6a8b04 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 5 Dec 2021 04:12:02 -0600 Subject: [PATCH 043/130] simpler sinking test case --- .../tools/llvm-extract/extract-block-sink.ll | 62 ++++-------- .../tools/llvm-extract/extract-block-sink2.ll | 95 +++++++++++++++++++ 2 files changed, 112 insertions(+), 45 deletions(-) create mode 100644 llvm/test/tools/llvm-extract/extract-block-sink2.ll diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll index 55b1aba77d184..2bf743a718c07 100644 --- a/llvm/test/tools/llvm-extract/extract-block-sink.ll +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -2,57 +2,41 @@ -; CHECK-LABEL: define void @foo(i1 %c) { +; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: %a = alloca i32, align 4 ; CHECK-NEXT: %b = alloca i32, align 4 -; CHECK-NEXT: %A = alloca i32, align 4 -; CHECK-NEXT: %B = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) -; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly -; CHECK-EMPTY: -; CHECK-NEXT: outsideonly: -; CHECK-NEXT: store i32 41, i32* %b, align 4 -; CHECK-NEXT: store i32 42, i32* %A, align 4 -; CHECK-NEXT: br label %return +; CHECK-NEXT: br label %codeRepl ; CHECK-EMPTY: ; CHECK-NEXT: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) -; CHECK-NEXT: br label %region_start.split +; CHECK-NEXT: call void @foo.region_start(i32* %b) +; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: region_start: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) ; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) ; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split -; CHECK-EMPTY: -; CHECK-NEXT: region_start.split: ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: return: -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @foo.region_start(i32* %a, i32* %b, i32* %A) { +; CHECK-LABEL: define internal void @foo.region_start(i32* %b) { ; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: %a = alloca i32, align 4 ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: ; CHECK-NEXT: region_start: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) ; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) ; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split.exitStub +; CHECK-NEXT: br label %return.exitStub ; CHECK-EMPTY: -; CHECK-NEXT: region_start.split.exitStub: +; CHECK-NEXT: return.exitStub: ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -61,35 +45,23 @@ - - declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) -define void @foo(i1 %c) { + +define void @foo() { entry: %a = alloca i32, align 4 %b = alloca i32, align 4 - %A = alloca i32, align 4 - %B = alloca i32, align 4 - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) - br i1 %c, label %region_start, label %outsideonly - -outsideonly: - store i32 41, i32* %b - store i32 42, i32* %A - br label %return + br label %region_start region_start: + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) store i32 43, i32* %a + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) store i32 44, i32* %b - store i32 45, i32* %A - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) - store i32 46, i32* %B - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) br label %return return: - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) ret void } diff --git a/llvm/test/tools/llvm-extract/extract-block-sink2.ll b/llvm/test/tools/llvm-extract/extract-block-sink2.ll new file mode 100644 index 0000000000000..55b1aba77d184 --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-sink2.ll @@ -0,0 +1,95 @@ +; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + + +; CHECK-LABEL: define void @foo(i1 %c) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = alloca i32, align 4 +; CHECK-NEXT: %b = alloca i32, align 4 +; CHECK-NEXT: %A = alloca i32, align 4 +; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly +; CHECK-EMPTY: +; CHECK-NEXT: outsideonly: +; CHECK-NEXT: store i32 41, i32* %b, align 4 +; CHECK-NEXT: store i32 42, i32* %A, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) +; CHECK-NEXT: br label %region_start.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start.split: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; CHECK-LABEL: define internal void @foo.region_start(i32* %a, i32* %b, i32* %A) { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: %B = alloca i32, align 4 +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: store i32 45, i32* %A, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: store i32 46, i32* %B, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) +; CHECK-NEXT: br label %region_start.split.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: region_start.split.exitStub: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + + + + + + + +declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) +declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) + +define void @foo(i1 %c) { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %A = alloca i32, align 4 + %B = alloca i32, align 4 + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) + br i1 %c, label %region_start, label %outsideonly + +outsideonly: + store i32 41, i32* %b + store i32 42, i32* %A + br label %return + +region_start: + store i32 43, i32* %a + store i32 44, i32* %b + store i32 45, i32* %A + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) + store i32 46, i32* %B + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) + br label %return + +return: + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) + ret void +} From 084a66eb15f0c8d24bc6850129bc1944628b9dd4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 01:18:12 -0600 Subject: [PATCH 044/130] Refactor swift args --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 105 +++++++++----------- 1 file changed, 46 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8ebce0ecd32c5..d73ae41b431dd 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1009,6 +1009,15 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, co newFunction->addFnAttr(Attr); } + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto&& P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + newFunction->addParamAttr(P.index(), Attribute::SwiftError); + } + } + + // Set names for input and output arguments. if (!AggregateArgs) { @@ -1553,6 +1562,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + + + @@ -1642,15 +1661,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - SmallVector SwiftErrorArgs; + // SmallVector SwiftErrorArgs; std::vector ReloadOutputs; std::vector Reloads; if (!AggregateArgs) { for (Value* input : inputs) { params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - ++ArgNo; + // if (input->isSwiftError()) + // SwiftErrorArgs.push_back(ArgNo); + // ++ArgNo; } @@ -1676,6 +1695,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + // Emit the call to the function + CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "",codeReplacer); + + + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto&& P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); + } + } + + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -1700,19 +1732,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - - for (auto&& P : enumerate(inputs)) { VMap[P.value()] = NewValues[P.index()]; } -#if 0 - for (auto&& S : SinkingCands) { - VMap[S] = S; - } -#endif - - CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug @@ -1723,13 +1745,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) call->setDebugLoc(DL); } - codeReplacer->getInstList().push_back(call); + //codeReplacer->getInstList().push_back(call); + - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); @@ -1753,8 +1771,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; - } - else { + } else { Output = ReloadOutputs[i]; } ReloadAddress[outputs[i]] = Output; @@ -1771,8 +1788,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ReloadRepls.push_back(load); // Remove all PHIs; will need to be recreated by SSAUpdater; - } - else { + } else { std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction* inst = cast(Users[u]); @@ -2078,20 +2094,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - auto* BranchI = BranchInst::Create(header, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); - - - - - - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - @@ -2109,40 +2111,25 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - // const DataLayout &DL = M->getDataLayout(); - - - - - // Emit the call to the function - CallInst * call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "",codeReplacer); - + auto* BranchI = BranchInst::Create(header, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Add debug location to the new call, if the original function has debug // info. In that case, the terminator of the entry block of the extracted // function contains the first debug location of the extracted function, // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { + if (oldFunction->getSubprogram()) { if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) call->setDebugLoc(DL); } - // codeReplacer->getInstList().push_back(call); + + + - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { // TOOD: Move to constructFunction - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - // SmallVector AfterCall; ValueToValueMapTy VMap; From 09abd45f3f6d419ddd4701b3f36a1cf0453bf67c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 01:23:15 -0600 Subject: [PATCH 045/130] weights --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 24 +++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d73ae41b431dd..72c13cd84e165 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2132,7 +2132,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueToValueMapTy VMap; + // Reload the outputs passed in by reference. @@ -2143,17 +2143,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - //Value *GEP = Builder.CreateGEP(StructArgTy, Struct, Idx, Twine("gep_reload_") + outputs[i]->getName()); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { Output = ReloadOutputs[i]; } LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload",codeReplacer); - // auto Output = MakeReloadAddress(i); - // LoadInst *load = Builder.CreateLoad(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload"); - + Reloads.push_back(load); std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -2344,10 +2341,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, call); - // TODO: ByCopy - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + // Loop over all of the PHI nodes in the header and exit blocks, and change @@ -2378,9 +2372,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); + } + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + + + + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); + // Mark the new function `noreturn` if applicable. Terminators which resume // exception propagation are treated as returning instructions. This is to // avoid inserting traps after calls to outlined functions which unwind. From 1e1e248eca7074f4630d3628bd7a4e807124cb25 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 01:48:33 -0600 Subject: [PATCH 046/130] move code before inserting replacement --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 54 +++++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 72c13cd84e165..225ce424a670a 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1608,13 +1608,37 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + + //// Copy/Move code //////////////////////////////////////////////////////////////////////////// + + // Determine position for the replacement code + auto ReplIP = header; + if (!KeepOldBlocks) { + while (ReplIP && Blocks.count(ReplIP)) { + ReplIP = ReplIP->getNextNode(); + } + } + + + if (KeepOldBlocks) { + + } else { + moveCodeToFunction(newFunction); + } + + + //// Codegen newFunction call replacement ////////////////////////////////////////////// // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); BasicBlock *AllocaBlock = &oldFunction->front(); + + + // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); @@ -1627,7 +1651,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; std::vector params; @@ -1659,17 +1682,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + - // SmallVector SwiftErrorArgs; + std::vector ReloadOutputs; std::vector Reloads; if (!AggregateArgs) { for (Value* input : inputs) { params.push_back(input); - // if (input->isSwiftError()) - // SwiftErrorArgs.push_back(ArgNo); - // ++ArgNo; } @@ -1714,7 +1735,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which // blocks were originally in the code region. @@ -2093,9 +2113,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value* RewriteVal = NewValues[i]; @@ -2129,7 +2147,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - + @@ -2236,6 +2254,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + + + // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke // result restore will be placed in the outlined function. @@ -2324,21 +2346,23 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, break; } + + + // Insert lifetime markers around the reloads of any output values. The // allocas output values are stored in are only in-use in the codeRepl block. insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - // CallInst* TheCall =call; - moveCodeToFunction(newFunction); + + // TODO: ByCopy // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, call); + insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, call); From 6bfcd3e5d52a44b228ce4381306874d67eb25f89 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 01:53:42 -0600 Subject: [PATCH 047/130] copy code move --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 93 +++++++++------------ 1 file changed, 39 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 225ce424a670a..c3e6a32252f53 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1547,8 +1547,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } } - // assert((SinkingCands.empty() || FirstSunkAlloca) && "Did not expect a sink candidate without any allocas"); - + if (!HoistingCands.empty()) { @@ -1606,6 +1605,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, NewValues.push_back(RewriteVal); } + for (auto&& P : enumerate(inputs)) { + VMap[P.value()] = NewValues[P.index()]; + } + @@ -1623,6 +1626,38 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { + for (BasicBlock* Block : Blocks) { + BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); + + + for (auto&& P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); + } + } + } + } else { moveCodeToFunction(newFunction); } @@ -1752,32 +1787,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - for (auto&& P : enumerate(inputs)) { - VMap[P.value()] = NewValues[P.index()]; - } - - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - //codeReplacer->getInstList().push_back(call); + - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); DenseMap ReloadReplacements; SmallVector ReloadRepls; DenseMap ReloadAddress; - // DenseMap SpillAddress; + // Reload the outputs passed in by reference. @@ -1803,7 +1821,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { auto OrigOut = outputs[i]; - //VMap[Out] = load; ReloadReplacements[OrigOut] = load; ReloadRepls.push_back(load); @@ -1825,38 +1842,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, codeReplacer, 0, codeReplacer); - // auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock* Block : Blocks) { - BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - - for (auto&& P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); - } - } - } From 740d3f245a5d4005f9272b47d1b7bdfbcc2a431d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 02:46:10 -0600 Subject: [PATCH 048/130] exit block code split --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 72 ++++++++++++------- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 2c80627bad288..d3346edb04eae 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -106,7 +106,6 @@ class CodeExtractorAnalysisCache { // Mapping from the original exit blocks, to the new blocks inside // the function. SmallVector OldTargets; - // SmallVector > OldExitingEdges; SmallPtrSet ExitBlocks; // Suffix to use when creating extracted function (appended to the original @@ -119,6 +118,7 @@ class CodeExtractorAnalysisCache { + public: /// Create a code extractor for a sequence of blocks. /// diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c3e6a32252f53..c9143a7a00ec9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1411,6 +1411,7 @@ void CodeExtractor::recomputeExitBlocks() { + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { @@ -2139,7 +2140,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Reload the outputs passed in by reference. - // Builder.SetInsertPoint(codeReplacer); for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; if (AggregateArgs) { @@ -2165,36 +2165,45 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. + + + std::map ExitBlockMap; + //SmallVector ExitBlockSwitchIdx; + SmallDenseMap ExitBlockSwitchIdx; + SmallVector Orlder; - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { + for (BasicBlock* OldTarget : OldTargets) { if (Blocks.count(OldTarget)) continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); + auto Added = ExitBlockSwitchIdx.insert({ OldTarget, ExitBlockSwitchIdx.size() }); + if (Added.second) + Orlder.push_back(OldTarget); + } + + for (auto OldTarget : OldTargets) { + BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + } VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; + } + + + for (auto &&P:ExitBlockMap){ + auto OldTarget = P.first; + auto NewTarget = P.second; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + + + auto &Context = Blocks.front()->getContext(); Value *brVal = nullptr; assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); switch (NumExitBlocks) { @@ -2208,9 +2217,21 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, break; } + ReturnInst::Create(Context, brVal, NewTarget); + } + + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + + for (auto &&P: Orlder) { + auto OldTarget = P; + auto SuccNum =ExitBlockSwitchIdx[OldTarget]; - // Update the switch instruction. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), OldTarget); @@ -2218,6 +2239,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + + + for (BasicBlock* Block : Blocks) { Instruction* TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { From 657923d243a2c70c11a8a7a4a872d73358686051 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 02:54:15 -0600 Subject: [PATCH 049/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c9143a7a00ec9..bd5915c746407 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2185,21 +2185,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (auto OldTarget : OldTargets) { BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (!NewTarget) { + if (NewTarget) + continue; + // If we don't already have an exit stub for this non-extracted // destination, create one now! - NewTarget = BasicBlock::Create(Context, + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); - } + VMap[OldTarget] = NewTarget; - } - - - for (auto &&P:ExitBlockMap){ - auto OldTarget = P.first; - auto NewTarget = P.second; + // auto OldTarget = P.first; + // auto NewTarget = P.second; auto SuccNum = ExitBlockSwitchIdx[OldTarget]; From 6c34205255e31dfb6d08eefabc4a384797b6ae17 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 02:58:15 -0600 Subject: [PATCH 050/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 37 +++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bd5915c746407..1bdaf5e7683c1 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -438,7 +438,7 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { } // Now add the old exit block to the outline region. Blocks.insert(CommonExitBlock); - OldTargets.push_back(NewExitBlock); + //OldTargets.push_back(NewExitBlock); return CommonExitBlock; } @@ -1558,9 +1558,27 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // MoveOrCopyInst(cast(II), HoistToBlock, TI->getIterator()); cast(II)->moveBefore(TI); } + recomputeExitBlocks(); } + std::map ExitBlockMap; + SmallDenseMap ExitBlockSwitchIdx; + SmallVector Orlder; + + for (BasicBlock* OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + + auto Added = ExitBlockSwitchIdx.insert({ OldTarget, ExitBlockSwitchIdx.size() }); + if (Added.second) + Orlder.push_back(OldTarget); + } + + + + + // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these @@ -2169,20 +2187,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - std::map ExitBlockMap; - //SmallVector ExitBlockSwitchIdx; - SmallDenseMap ExitBlockSwitchIdx; - SmallVector Orlder; - - for (BasicBlock* OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - - auto Added = ExitBlockSwitchIdx.insert({ OldTarget, ExitBlockSwitchIdx.size() }); - if (Added.second) - Orlder.push_back(OldTarget); - } - for (auto OldTarget : OldTargets) { BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; if (NewTarget) @@ -2196,8 +2200,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, VMap[OldTarget] = NewTarget; - // auto OldTarget = P.first; - // auto NewTarget = P.second; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; From 1f9c10b1e8147734e31d831c796d6c5cfa2a855a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 02:59:57 -0600 Subject: [PATCH 051/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 72 +++++++++++---------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1bdaf5e7683c1..8b734ee550521 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1682,6 +1682,44 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + if (!KeepOldBlocks) { + for (auto OldTarget : OldTargets) { + BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + + VMap[OldTarget] = NewTarget; + + + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + + + auto& Context = Blocks.front()->getContext(); + Value* brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + + ReturnInst::Create(Context, brVal, NewTarget); + } + } + + //// Codegen newFunction call replacement ////////////////////////////////////////////// @@ -2187,40 +2225,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - for (auto OldTarget : OldTargets) { - BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - - VMap[OldTarget] = NewTarget; - - - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - - auto &Context = Blocks.front()->getContext(); - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - - ReturnInst::Create(Context, brVal, NewTarget); - } // Now we can emit a switch statement using the call as a value. From 3d6f543e4248ad7f7a23e495695883dea7546ef4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:04:11 -0600 Subject: [PATCH 052/130] Copy using common switch gen --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 37 +++++++++------------ 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8b734ee550521..9d7a305b5687d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1682,7 +1682,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - if (!KeepOldBlocks) { + for (auto OldTarget : OldTargets) { BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; if (NewTarget) @@ -1717,7 +1717,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ReturnInst::Create(Context, brVal, NewTarget); } - } + @@ -1844,11 +1844,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - - - - - DenseMap ReloadReplacements; SmallVector ReloadRepls; DenseMap ReloadAddress; @@ -1902,7 +1897,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - +#if 0 // Since there may be multiple exits from the original region, make the new // function return an unsigned, switch on that number. This loop iterates // over all of the blocks in the extracted region, updating any terminator @@ -1954,6 +1949,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SuccNum), OldTarget); } +#endif + + for (auto &&P: Orlder) { + auto OldTarget = P; + auto SuccNum =ExitBlockSwitchIdx[OldTarget]; + + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } + + @@ -2148,15 +2155,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } else { - - - - - - - - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value* RewriteVal = NewValues[i]; @@ -2170,7 +2168,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - auto* BranchI = BranchInst::Create(header, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); @@ -2191,10 +2188,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - - // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; From 895d538370e2f121a246b9c6064bde9cf26608b1 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:06:22 -0600 Subject: [PATCH 053/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 91 ++++++++------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 9d7a305b5687d..d445583bdf9c1 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1821,6 +1821,37 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + if (!KeepOldBlocks) { + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value* Output = nullptr; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } + else { + Output = ReloadOutputs[i]; + } + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); + + Reloads.push_back(load); + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!KeepOldBlocks) { + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + } + + + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -1888,6 +1919,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + + // Now we can emit a switch statement using the call as a value. SwitchInst* TheSwitch = SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), @@ -1897,60 +1930,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, -#if 0 - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock* OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) { - // llvm_unreachable("Happens if e.g. switch has multiple edges to target"); - continue; - } - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - VMap[OldTarget] = NewTarget; - unsigned SuccNum = switchVal++; - - Value* brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // auto OldPredecessor = OldTarget->getUniquePredecessor(); - - - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } -#endif - for (auto &&P: Orlder) { auto OldTarget = P; auto SuccNum =ExitBlockSwitchIdx[OldTarget]; @@ -2187,7 +2166,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - +#if 0 // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; @@ -2213,7 +2192,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } } - +#endif From e90022c498ee6ac3e58e948b1e1220ce5a41bde7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:17:48 -0600 Subject: [PATCH 054/130] Common Reloads --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 61 ++++----------------- 1 file changed, 11 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d445583bdf9c1..8ab648f946fdd 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1821,7 +1821,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - if (!KeepOldBlocks) { + // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value* Output = nullptr; @@ -1839,16 +1839,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); + + if (!KeepOldBlocks) { + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!KeepOldBlocks) { + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } } } } - } + @@ -1875,48 +1878,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - DenseMap ReloadReplacements; - SmallVector ReloadRepls; - DenseMap ReloadAddress; - - - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* Output = nullptr; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - ReloadAddress[outputs[i]] = Output; - - - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - - - if (KeepOldBlocks) { - auto OrigOut = outputs[i]; - ReloadReplacements[OrigOut] = load; - ReloadRepls.push_back(load); - - // Remove all PHIs; will need to be recreated by SSAUpdater; - } else { - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } From a0dbb9ac78fa1e7d9e8af51c4effdb631f807ca2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:19:48 -0600 Subject: [PATCH 055/130] Reloads update --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 29 ++++++++++----------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8ab648f946fdd..1e7e24879c96f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1837,26 +1837,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Output = ReloadOutputs[i]; } LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - - if (!KeepOldBlocks) { - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } } - - //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -1877,11 +1863,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - if (KeepOldBlocks) { + if (!KeepOldBlocks) { + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (!KeepOldBlocks) { + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + } + if (KeepOldBlocks) { // Now we can emit a switch statement using the call as a value. SwitchInst* TheSwitch = SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), From fa5ab1f19005caac200b2e28d47c6fc959dd7529 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:22:57 -0600 Subject: [PATCH 056/130] Update inputs belongs to create extracted function --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1e7e24879c96f..421349f84c7ec 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1679,6 +1679,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } else { moveCodeToFunction(newFunction); + + + if (!KeepOldBlocks) { + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value* RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User* use : Users) + if (Instruction* inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } } @@ -1863,6 +1876,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + if (!KeepOldBlocks) { for (unsigned i = 0, e = outputs.size(); i != e; ++i) { auto load = Reloads[i]; @@ -2094,15 +2108,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } else { - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* RewriteVal = NewValues[i]; - - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } From c7f9480f35d9b93989ba57333d96877c3fb1f5f6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:25:56 -0600 Subject: [PATCH 057/130] switch --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 96 ++++----------------- 1 file changed, 18 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 421349f84c7ec..04dad9792d510 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1855,6 +1855,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + + for (auto &&P: Orlder) { + auto OldTarget = P; + auto SuccNum =ExitBlockSwitchIdx[OldTarget]; + + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } + + + + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -1895,29 +1913,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { - // Now we can emit a switch statement using the call as a value. - SwitchInst* TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - - - - - for (auto &&P: Orlder) { - auto OldTarget = P; - auto SuccNum =ExitBlockSwitchIdx[OldTarget]; - - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - - - - - @@ -2109,9 +2104,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } else { - - - auto* BranchI = BranchInst::Create(header, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); @@ -2130,58 +2122,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - -#if 0 - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload",codeReplacer); - - Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } -#endif - - - - - - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - - for (auto &&P: Orlder) { - auto OldTarget = P; - auto SuccNum =ExitBlockSwitchIdx[OldTarget]; - - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - - - for (BasicBlock* Block : Blocks) { From e24471619912a8478a5b446a039608179063ba65 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:29:44 -0600 Subject: [PATCH 058/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 72 +++++++++------------ 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 04dad9792d510..d804e733ca9b8 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1677,6 +1677,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + + + + } else { moveCodeToFunction(newFunction); @@ -1733,6 +1737,31 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + for (BasicBlock* Block : Blocks) { + Instruction* TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock* OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock* NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } + } + } + + + + + + + //// Codegen newFunction call replacement ////////////////////////////////////////////// @@ -1917,25 +1946,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - for (BasicBlock* Block : Blocks) { - Instruction* TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock* OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock* NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - // rewrite the original branch instruction with this new target - // TI->setSuccessor(i, NewTarget); - VMap[OldTarget] = NewTarget; - } - } - - - - // Now that we've done the deed, simplify the switch instruction. Type* OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { @@ -2124,30 +2134,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - for (BasicBlock* Block : Blocks) { - Instruction* TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock* OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock* NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } - } - } - - - - - - // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke From 96f681ce1b96555e9ee1232b4ca41107128c94a7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:51:31 -0600 Subject: [PATCH 059/130] reload eariler --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 104 ++++++++++---------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d804e733ca9b8..299122d00cd5d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1757,8 +1757,59 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + if (!KeepOldBlocks) { + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertBefore); + ++OAI; + } + } + + } + @@ -1843,10 +1894,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); + @@ -1932,7 +1980,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction* inst = cast(Users[u]); if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent())) + if (!Blocks.count(inst->getParent()) && inst->getParent()->getParent() == oldFunction) inst->replaceUsesOfWith(outputs[i], load); } } @@ -2133,52 +2181,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } - // Now that we've done the deed, simplify the switch instruction. Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { From 20916673daa02ebae4083ab8237a62622787c81c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:53:27 -0600 Subject: [PATCH 060/130] check old function --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 299122d00cd5d..e74323f2491d2 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1963,8 +1963,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) + if (I->isTerminator() && I->getFunction() == oldFunction) I->replaceUsesOfWith(header, codeReplacer); @@ -1979,10 +1978,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction* inst = cast(Users[u]); - if (!KeepOldBlocks) { - if (!Blocks.count(inst->getParent()) && inst->getParent()->getParent() == oldFunction) + + if (inst->getParent()->getParent() == oldFunction) inst->replaceUsesOfWith(outputs[i], load); - } } } } From dbacfa1c6de4d81aa68567a27f0bdf64dc71b7fe Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:55:29 -0600 Subject: [PATCH 061/130] switch update --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 136 +++++++------------- 1 file changed, 44 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index e74323f2491d2..342d38d279771 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1950,50 +1950,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - - //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// - - - - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) - I->replaceUsesOfWith(header, codeReplacer); - - - - - - - if (!KeepOldBlocks) { - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } - - - - if (KeepOldBlocks) { - - - - // Now that we've done the deed, simplify the switch instruction. - Type* OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { case 0: // There are no successors (the block containing the switch itself), which @@ -2003,12 +1961,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } - else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } - else { + } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. ReturnInst::Create(Context, @@ -2034,13 +1990,53 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// + + + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto& U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction* I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + + + + + + if (!KeepOldBlocks) { + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + + + + if (KeepOldBlocks) { + + + + for (auto Pred : predecessors(header)) { if (VMap.count(Pred)) @@ -2179,50 +2175,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - From 365ce3c446de3eb89287ff0c4a6cfe0d3c4e4f7e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 03:59:27 -0600 Subject: [PATCH 062/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 55 ++++++++++++--------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 342d38d279771..004db0d506823 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1679,6 +1679,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; + } } else { @@ -1698,8 +1703,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - - for (auto OldTarget : OldTargets) { BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; if (NewTarget) @@ -1737,6 +1740,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + for (BasicBlock* Block : Blocks) { Instruction* TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -1757,6 +1761,30 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + + + + if (KeepOldBlocks) { + for (Instruction* II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock* Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + BasicBlock& Y = cast(*NewBlock); + + // Loop over all instructions, fixing each one as we find it... + + for (Instruction& II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + } + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) @@ -2038,31 +2066,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } - - - for (Instruction* II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock* Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock& Y = cast(*NewBlock); - // Loop over all instructions, fixing each one as we find it... - for (Instruction& II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } + From 5c86c35273cbe56f05e41591053f9b94b60dbb2e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:01:46 -0600 Subject: [PATCH 063/130] SSA --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 510 ++++++++++---------- 1 file changed, 255 insertions(+), 255 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 004db0d506823..c28d91e8178b7 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1413,18 +1413,18 @@ void CodeExtractor::recomputeExitBlocks() { Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &inputs, ValueSet &outputs, bool KeepOldBlocks ) { +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, + ValueSet& inputs, ValueSet& outputs, bool KeepOldBlocks) { if (!isEligible()) - return nullptr; + return nullptr; // Assumption: this is a single-entry code region, and the header is the first // block in the region. - BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); + BasicBlock* header = *Blocks.begin(); + Function* oldFunction = header->getParent(); Module* M = oldFunction->getParent(); - LLVMContext &Context = M->getContext(); + LLVMContext& Context = M->getContext(); const DataLayout& DL = M->getDataLayout(); @@ -1442,20 +1442,20 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; - DenseMap ExitWeights; + DenseMap ExitWeights; if (BFI) { assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { + for (BasicBlock* Pred : predecessors(header)) { if (Blocks.count(Pred)) continue; EntryFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); } - for (BasicBlock *Succ : ExitBlocks) { - for (BasicBlock *Block : predecessors(Succ)) { + for (BasicBlock* Succ : ExitBlocks) { + for (BasicBlock* Block : predecessors(Succ)) { if (!Blocks.count(Block)) continue; - BlockFrequency &BF = ExitWeights[Succ]; + BlockFrequency& BF = ExitWeights[Succ]; BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); } } @@ -1480,7 +1480,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; + BasicBlock* CommonExit = nullptr; findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); @@ -1499,7 +1499,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); + Function* newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1508,19 +1508,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); + BasicBlock* newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); ValueToValueMapTy VMap; - + SmallVector AdditionalRemap; - auto MoveOrCopyInst = [KeepOldBlocks](Instruction *I, BasicBlock *IB, BasicBlock:: iterator IP) -> Instruction * { + auto MoveOrCopyInst = [KeepOldBlocks](Instruction* I, BasicBlock* IB, BasicBlock::iterator IP) -> Instruction* { if (KeepOldBlocks) { - auto AI= I->clone(); + auto AI = I->clone(); AI->setName(I->getName()); - IB->getInstList().insert( IP, AI); + IB->getInstList().insert(IP, AI); return AI; - } + } I->moveBefore(*IB, IP); return I; }; @@ -1532,7 +1532,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, for (auto* II : SinkingCands) { if (!isa(II)) { - auto New = MoveOrCopyInst(cast(II),newFuncRoot, newFuncRoot->getFirstInsertionPt()); + auto New = MoveOrCopyInst(cast(II), newFuncRoot, newFuncRoot->getFirstInsertionPt()); if (KeepOldBlocks) { AdditionalRemap.push_back(New); VMap[II] = New; @@ -1540,30 +1540,30 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } for (auto* II : SinkingCands) { - if (auto* AI = dyn_cast(II)) { - AI = cast( MoveOrCopyInst(AI,newFuncRoot, newFuncRoot->getFirstInsertionPt())); + if (auto* AI = dyn_cast(II)) { + AI = cast(MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); if (KeepOldBlocks) { AdditionalRemap.push_back(AI); VMap[II] = AI; } } } - + if (!HoistingCands.empty()) { auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Instruction* TI = HoistToBlock->getTerminator(); for (auto* II : HoistingCands) { - // MoveOrCopyInst(cast(II), HoistToBlock, TI->getIterator()); + // MoveOrCopyInst(cast(II), HoistToBlock, TI->getIterator()); cast(II)->moveBefore(TI); } recomputeExitBlocks(); } - std::map ExitBlockMap; - SmallDenseMap ExitBlockSwitchIdx; + std::map ExitBlockMap; + SmallDenseMap ExitBlockSwitchIdx; SmallVector Orlder; for (BasicBlock* OldTarget : OldTargets) { @@ -1594,13 +1594,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) + StructType* StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) StructTy = cast(newFunction->getArg(0)->getType()); - - - + + + // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); @@ -1623,7 +1623,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, NewValues.push_back(RewriteVal); } - + for (auto&& P : enumerate(inputs)) { VMap[P.value()] = NewValues[P.index()]; } @@ -1686,7 +1686,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - } else { + } + else { moveCodeToFunction(newFunction); @@ -1703,141 +1704,143 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - for (auto OldTarget : OldTargets) { - BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; + for (auto OldTarget : OldTargets) { + BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); - VMap[OldTarget] = NewTarget; + VMap[OldTarget] = NewTarget; - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - auto& Context = Blocks.front()->getContext(); - Value* brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } + auto& Context = Blocks.front()->getContext(); + Value* brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } - ReturnInst::Create(Context, brVal, NewTarget); - } - + ReturnInst::Create(Context, brVal, NewTarget); + } - for (BasicBlock* Block : Blocks) { - Instruction* TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock* OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock* NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } + + for (BasicBlock* Block : Blocks) { + Instruction* TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock* OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock* NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + else { + VMap[OldTarget] = NewTarget; } } + } - - if (KeepOldBlocks) { - for (Instruction* II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock* Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock& Y = cast(*NewBlock); - // Loop over all instructions, fixing each one as we find it... + if (KeepOldBlocks) { + for (Instruction* II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - for (Instruction& II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock* Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; } + BasicBlock& Y = cast(*NewBlock); + + // Loop over all instructions, fixing each one as we find it... + + for (Instruction& II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } + } - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); - if (!KeepOldBlocks) { - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } + if (!KeepOldBlocks) { + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto* OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto* InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto* Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + Instruction* InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst* GEP = GetElementPtrInst::Create( + StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } + else { + new StoreInst(outputs[i], &*OAI, InsertBefore); + ++OAI; + } } + } + @@ -1845,8 +1848,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// Codegen newFunction call replacement ////////////////////////////////////////////// // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); - BasicBlock *AllocaBlock = &oldFunction->front(); + BasicBlock* codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); + BasicBlock* AllocaBlock = &oldFunction->front(); @@ -1866,13 +1869,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Add inputs as params, or to be filled into the struct unsigned ArgNo = 0; - std::vector params; + std::vector params; - AllocaInst *Struct = nullptr; - if (AggregateArgs && StructTy) { - std::vector StructValues; + AllocaInst* Struct = nullptr; + if (AggregateArgs && StructTy) { + std::vector StructValues; for (Value* input : inputs) { - StructValues.push_back(input); + StructValues.push_back(input); ++ArgNo; } @@ -1885,31 +1888,31 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, params.push_back(Struct); for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; + Value* Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); new StoreInst(StructValues[i], GEP, codeReplacer); } } - - - - std::vector ReloadOutputs; - std::vector Reloads; + + + + std::vector ReloadOutputs; + std::vector Reloads; if (!AggregateArgs) { for (Value* input : inputs) { - params.push_back(input); + params.push_back(input); } - - // Create allocas for the outputs + + // Create allocas for the outputs for (Value* output : outputs) { AllocaInst* alloca = new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), @@ -1927,7 +1930,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "",codeReplacer); + CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); // Set swifterror parameter attributes. @@ -1939,88 +1942,90 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* Output = nullptr; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } - else { - Output = ReloadOutputs[i]; - } - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value* Output = nullptr; + if (AggregateArgs) { + Value* Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; } - + else { + Output = ReloadOutputs[i]; + } + LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); + // Now we can emit a switch statement using the call as a value. + SwitchInst* TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); - for (auto &&P: Orlder) { - auto OldTarget = P; - auto SuccNum =ExitBlockSwitchIdx[OldTarget]; - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } + for (auto&& P : Orlder) { + auto OldTarget = P; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; + // Now that we've done the deed, simplify the switch instruction. + Type* OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void } + else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } + else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } @@ -2045,34 +2050,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - if (!KeepOldBlocks) { - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } - - - if (KeepOldBlocks) { - - - - - - - - - - - - // Must be done after remap SSAUpdater SSA; for (auto P : enumerate(outputs)) { @@ -2101,6 +2079,28 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + } else { + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction* inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + + + + if (KeepOldBlocks) { + + + + + + From 20b71503dc1fa1b56f0c2f6f138c0dc7af4cd372 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:07:53 -0600 Subject: [PATCH 064/130] outputs stores --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 66 +++------------------ 1 file changed, 7 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c28d91e8178b7..9e59b3ddf6ff0 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1793,16 +1793,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, std::advance(OutputArgBegin, inputs.size()); - if (!KeepOldBlocks) { + // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; + Function::arg_iterator OAI = OutputArgBegin; for (unsigned i = 0, e = outputs.size(); i != e; ++i) { auto* OutI = dyn_cast(outputs[i]); if (!OutI) continue; + if (KeepOldBlocks) + OutI = cast(VMap.lookup(OutI)); + // Find proper insertion point. BasicBlock::iterator InsertPt; // In case OutI is an invoke, we insert the store at the beginning in the @@ -1828,20 +1831,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, GetElementPtrInst* GEP = GetElementPtrInst::Create( StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); // Since there should be only one struct argument aggregating // all the output values, we shouldn't increment OAI, which always // points to the struct argument, in this case. } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); + new StoreInst(OutI, &*OAI, InsertBefore); ++OAI; } } - } - - @@ -2098,60 +2098,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - - - - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto* OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto* InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto* Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction* InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } - else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } - } - - BasicBlock* HeaderCopy = cast(VMap.lookup(header)); assert(HeaderCopy); auto* BranchI2 = BranchInst::Create(HeaderCopy, newFuncRoot); From a375438b4359d6a6b042e337b23ed5dd36b0d67f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:11:52 -0600 Subject: [PATCH 065/130] connect to NewHeader --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 9e59b3ddf6ff0..c5b9becc5f923 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1786,6 +1786,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, } } + auto NewHeader=header; + if (KeepOldBlocks) + NewHeader = cast(VMap.lookup(NewHeader)); + assert(NewHeader); + auto* BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); @@ -2100,10 +2107,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - BasicBlock* HeaderCopy = cast(VMap.lookup(header)); - assert(HeaderCopy); - auto* BranchI2 = BranchInst::Create(HeaderCopy, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + if (!oldFunction) { newFunction->viewCFG(); @@ -2111,8 +2115,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, } else { - auto* BranchI = BranchInst::Create(header, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Add debug location to the new call, if the original function has debug From 52d07c098703745fba6640624a5f7b9bff2686ad Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:14:35 -0600 Subject: [PATCH 066/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 34 +++++++++------------ 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c5b9becc5f923..685823f9fcec8 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1794,6 +1794,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) @@ -1950,6 +1951,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + + + + // Reload the outputs passed in by reference. for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value* Output = nullptr; @@ -2102,30 +2115,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, if (KeepOldBlocks) { - - - - - - - - if (!oldFunction) { - newFunction->viewCFG(); - } - } else { - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - + From fea2bcb77d443a36f2beb53a8e99ee1ee1d0348b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:26:22 -0600 Subject: [PATCH 067/130] lifetime markers for copy --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 46 ++++++++----------- .../extract-block-multiple-exits.ll | 32 +++++++++---- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 685823f9fcec8..3a674ceff61f6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1973,8 +1973,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; - } - else { + } else { Output = ReloadOutputs[i]; } LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); @@ -2048,6 +2047,23 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, } + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + + + + + + + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, call); + + + //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// @@ -2122,32 +2138,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - - - - - - - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - - - - - - - // TODO: ByCopy - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, call); - - - - - // Loop over all of the PHI nodes in the header and exit blocks, and change // any references to the old incoming edge to be the new incoming edge. for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index f40f48eefb0b0..b4c0667b9a58d 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -10,11 +10,23 @@ ; CHECK-NEXT: br i1 %c0, label %codeRepl, label %exit ; CHECK-EMPTY: ; CHECK-NEXT: codeRepl: +; CHECK-NEXT: %lt.cast = bitcast i32* %a.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast) +; CHECK-NEXT: %lt.cast1 = bitcast i32* %b.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1) +; CHECK-NEXT: %lt.cast2 = bitcast i32* %c.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast2) +; CHECK-NEXT: %lt.cast3 = bitcast i32* %B.ce.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast3) ; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.loc, i32* %b.loc, i32* %c.loc, i32* %B.ce.loc) ; CHECK-NEXT: %a.reload = load i32, i32* %a.loc, align 4 ; CHECK-NEXT: %b.reload = load i32, i32* %b.loc, align 4 ; CHECK-NEXT: %c.reload = load i32, i32* %c.loc, align 4 ; CHECK-NEXT: %B.ce.reload = load i32, i32* %B.ce.loc, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast2) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast3) ; CHECK-NEXT: switch i16 %targetBlock, label %exit0 [ ; CHECK-NEXT: i16 0, label %exiting0.exit_crit_edge ; CHECK-NEXT: i16 1, label %fallback @@ -31,7 +43,7 @@ ; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge, label %exit0.split ; CHECK-EMPTY: ; CHECK-NEXT: exiting0.exit_crit_edge: -; CHECK-NEXT: %b.merge_with_extracted4 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] +; CHECK-NEXT: %b.merge_with_extracted7 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] ; CHECK-NEXT: br label %exit ; CHECK-EMPTY: ; CHECK-NEXT: exiting1: @@ -47,26 +59,26 @@ ; CHECK-NEXT: unreachable ; CHECK-EMPTY: ; CHECK-NEXT: exit: -; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted4, %exiting0.exit_crit_edge ] +; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted7, %exiting0.exit_crit_edge ] ; CHECK-NEXT: store i32 %A, i32* %arg, align 4 ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: exit0.split: -; CHECK-NEXT: %b.merge_with_extracted3 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] +; CHECK-NEXT: %b.merge_with_extracted6 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] ; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] ; CHECK-NEXT: br label %exit0 ; CHECK-EMPTY: ; CHECK-NEXT: exit0: ; CHECK-NEXT: %B.ce.merge_with_extracted = phi i32 [ %B.ce.reload, %codeRepl ], [ %B.ce, %exit0.split ] -; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted3, %exit0.split ] -; CHECK-NEXT: %a.merge_with_extracted2 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] -; CHECK-NEXT: store i32 %a.merge_with_extracted2, i32* %arg, align 4 +; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted6, %exit0.split ] +; CHECK-NEXT: %a.merge_with_extracted5 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] +; CHECK-NEXT: store i32 %a.merge_with_extracted5, i32* %arg, align 4 ; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, i32* %arg, align 4 ; CHECK-NEXT: br label %after ; CHECK-EMPTY: ; CHECK-NEXT: exit1: -; CHECK-NEXT: %c.merge_with_extracted5 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] -; CHECK-NEXT: %a.merge_with_extracted1 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] +; CHECK-NEXT: %c.merge_with_extracted8 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: %a.merge_with_extracted4 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] ; CHECK-NEXT: br label %after ; CHECK-EMPTY: ; CHECK-NEXT: exit2: @@ -76,8 +88,8 @@ ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: after: -; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted2, %exit0 ], [ %a.merge_with_extracted1, %exit1 ] -; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted5, %exit1 ] +; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted5, %exit0 ], [ %a.merge_with_extracted4, %exit1 ] +; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted8, %exit1 ] ; CHECK-NEXT: store i32 %a.merge_with_extracted, i32* %arg, align 4 ; CHECK-NEXT: store i32 %D, i32* %arg, align 4 ; CHECK-NEXT: br label %return From 4f0cce81cb505f35c8e39829d8e1a0d44cd24fbc Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:34:58 -0600 Subject: [PATCH 068/130] connect PHIs --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 76 +++++++++------------ 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3a674ceff61f6..db81e56c19515 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1784,8 +1784,21 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, for (Instruction& II : Y) RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } + }else{ + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode* PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + } + + auto NewHeader=header; if (KeepOldBlocks) NewHeader = cast(VMap.lookup(NewHeader)); @@ -2053,11 +2066,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - - - - - // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, call); @@ -2081,8 +2089,26 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, if (I->isTerminator() && I->getFunction() == oldFunction) I->replaceUsesOfWith(header, codeReplacer); + if (!KeepOldBlocks) { + for (BasicBlock* ExitBB : ExitBlocks) + for (PHINode& PN : ExitBB->phis()) { + Value* IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; - + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } + else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + } @@ -2130,44 +2156,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - if (KeepOldBlocks) { - } else { - - - - - - - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode* PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - - for (BasicBlock* ExitBB : ExitBlocks) - for (PHINode& PN : ExitBB->phis()) { - Value* IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } - else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - - - } // Update the branch weights for the exit block. if (BFI && NumExitBlocks > 1) From 8b0d1e059fb1b0be1ab88782d4756e682253403a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:36:02 -0600 Subject: [PATCH 069/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index db81e56c19515..87ced7ebd2e10 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2102,8 +2102,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, if (!IncomingCodeReplacerVal) { PN.setIncomingBlock(i, codeReplacer); IncomingCodeReplacerVal = PN.getIncomingValue(i); - } - else + } else assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && "PHI has two incompatbile incoming values from codeRepl"); } @@ -2140,7 +2139,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, SSA.RewriteUseAfterInsertions(U); } } - } else { for (unsigned i = 0, e = outputs.size(); i != e; ++i) { auto load = Reloads[i]; From fc6a7fd6b3d1f1ef438e4ff20587f522b49624c7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 04:37:31 -0600 Subject: [PATCH 070/130] clang-format --- llvm/include/llvm/Transforms/IPO.h | 4 +- .../llvm/Transforms/Utils/CodeExtractor.h | 44 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 20 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1779 ++++++++--------- llvm/tools/llvm-extract/llvm-extract.cpp | 23 +- 5 files changed, 868 insertions(+), 1002 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index e48677088b2ad..d11c27304815d 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks =false); + bool EraseFunctions, bool KeepOldBlocks = false); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks=false); + bool EraseFunctions, bool KeepOldBlocks = false); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index d3346edb04eae..0e6cda4bd4ab9 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -113,11 +113,7 @@ class CodeExtractorAnalysisCache { // label, if non-empty, otherwise "extracted". std::string Suffix; - - void recomputeExitBlocks(); - - - + void recomputeExitBlocks(); public: /// Create a code extractor for a sequence of blocks. @@ -151,7 +147,8 @@ class CodeExtractorAnalysisCache { /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool KeepOldBlocks = false); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + bool KeepOldBlocks = false); /// Perform the extraction, returning the new function and providing an /// interface to see what was categorized as inputs and outputs. @@ -162,13 +159,13 @@ class CodeExtractorAnalysisCache { /// newly outlined function. /// \param Outputs [out] - filled with values marked as outputs to the /// newly outlined function. - /// \param KeepOldBlocks If true, the original instances of the extracted region remain; instead of moving them to the new function they are copied. - /// \returns zero when called on a CodeExtractor instance where isEligible - /// returns false. + /// \param KeepOldBlocks If true, the original instances of the extracted + /// region remain; instead of moving them to the new function they are + /// copied. \returns zero when called on a CodeExtractor instance where + /// isEligible returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &Inputs, ValueSet &Outputs, bool KeepOldBlocks = false); - - + ValueSet &Inputs, ValueSet &Outputs, + bool KeepOldBlocks = false); /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as @@ -243,24 +240,15 @@ class CodeExtractorAnalysisCache { void severSplitPHINodesOfExits(); void splitReturnBlocks(); + void handleParams(Function *oldFunction, Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs); - void handleParams( - Function *oldFunction, Function *newFunction, - const ValueSet &inputs, - const ValueSet &outputs) ; - - - Function *constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, - BasicBlock *header - ); - - - - - - void canonicalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); - + Function *constructFunctionDeclaration(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header); + void canonicalizeCFGForExtraction(BasicBlock *&Header, + bool NoExitBlockPHIs); void moveCodeToFunction(Function *newFunction); diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 2556292177d45..79175bdabeef2 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,7 +43,8 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions, bool KeepOldBlocks=false) : EraseFunctions(EraseFunctions),KeepOldBlocks(KeepOldBlocks) {} + BlockExtractor(bool EraseFunctions, bool KeepOldBlocks = false) + : EraseFunctions(EraseFunctions), KeepOldBlocks(KeepOldBlocks) {} bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { @@ -91,12 +92,13 @@ class BlockExtractorLegacyPass : public ModulePass { BlockExtractorLegacyPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, bool EraseFunctions, bool KeepOldBlocks) - : ModulePass(ID), BE(EraseFunctions,KeepOldBlocks) { + : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { BE.init(GroupsOfBlocksToExtract); } BlockExtractorLegacyPass() - : BlockExtractorLegacyPass(SmallVector(), false, false) {} + : BlockExtractorLegacyPass(SmallVector(), false, false) { + } }; } // end anonymous namespace @@ -109,14 +111,17 @@ ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorLegacyPass(); } ModulePass *llvm::createBlockExtractorPass( - const SmallVectorImpl &BlocksToExtract, bool EraseFunctions, bool KeepOldBlocks) { - return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions, KeepOldBlocks); + const SmallVectorImpl &BlocksToExtract, bool EraseFunctions, + bool KeepOldBlocks) { + return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions, + KeepOldBlocks); } ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl> &GroupsOfBlocksToExtract, bool EraseFunctions, bool KeepOldBlocks) { - return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions,KeepOldBlocks); + return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions, + KeepOldBlocks); } /// Gets all of the blocks specified in the input file. @@ -224,7 +229,8 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC, KeepOldBlocks); + Function *F = CodeExtractor(BlocksToExtractVec) + .extractCodeRegion(CEAC, KeepOldBlocks); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 87ced7ebd2e10..4ba501459701d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -61,8 +61,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include @@ -252,7 +252,7 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix ) + std::string Suffix) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), @@ -261,7 +261,7 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, - std::string Suffix ) + std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, @@ -438,7 +438,7 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { } // Now add the old exit block to the outline region. Blocks.insert(CommonExitBlock); - //OldTargets.push_back(NewExitBlock); + // OldTargets.push_back(NewExitBlock); return CommonExitBlock; } @@ -659,8 +659,10 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - // Assume should not be the reason to introduce a parameter for the extracted function. - if (isa(II)) continue; + // Assume should not be the reason to introduce a parameter for the + // extracted function. + if (isa(II)) + continue; for (auto &OI : II.operands()) { Value *V = OI; @@ -802,11 +804,9 @@ void CodeExtractor::severSplitPHINodesOfExits() { void CodeExtractor::splitReturnBlocks() { for (BasicBlock *Block : Blocks) - if (ReturnInst* RI = dyn_cast(Block->getTerminator())) { - BasicBlock* New = - Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); - - + if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { + BasicBlock *New = + Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); if (DT) { // Old dominates New. New node dominates all other nodes dominated @@ -822,7 +822,7 @@ void CodeExtractor::splitReturnBlocks() { } if (BFI) { - BFI->setBlockFreq(New, BFI->getBlockFreq(Block).getFrequency()); + BFI->setBlockFreq(New, BFI->getBlockFreq(Block).getFrequency()); } if (BPI) { // BPI->getEdgeProbability() @@ -833,212 +833,214 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) -Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header){ - LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); - LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - - Function *oldFunction = header->getParent(); - Module *M = oldFunction->getParent(); - - // This function returns unsigned, outputs will go back by reference. - switch (NumExitBlocks) { - case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; - } +Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header) { + LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + Function *oldFunction = header->getParent(); + Module *M = oldFunction->getParent(); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: + RetTy = Type::getVoidTy(header->getContext()); + break; + case 2: + RetTy = Type::getInt1Ty(header->getContext()); + break; + default: + RetTy = Type::getInt16Ty(header->getContext()); + break; + } - std::vector paramTy; - SmallVector VMapArg; - // Add the types of the input values to the function's argument list - for (Value *value : inputs) { - LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); VMapArg.push_back(value); - } + std::vector paramTy; + SmallVector VMapArg; + // Add the types of the input values to the function's argument list + for (Value *value : inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); + VMapArg.push_back(value); + } - // Add the types of the output values to the function's argument list. - for (Value *output : outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - if (AggregateArgs) - paramTy.push_back(output->getType()); - else - paramTy.push_back(PointerType::getUnqual(output->getType())); - } + // Add the types of the output values to the function's argument list. + for (Value *output : outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + if (AggregateArgs) + paramTy.push_back(output->getType()); + else + paramTy.push_back(PointerType::getUnqual(output->getType())); + } - LLVM_DEBUG({ - dbgs() << "Function type: " << *RetTy << " f("; + LLVM_DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; for (Type *i : paramTy) - dbgs() << *i << ", "; + dbgs() << *i << ", "; dbgs() << ")\n"; - }); - - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - StructTy = StructType::get(M->getContext(), paramTy); - paramTy.clear(); - paramTy.push_back(PointerType::getUnqual(StructTy)); - } - FunctionType *funcType = - FunctionType::get(RetTy, paramTy, - AllowVarArgs && oldFunction->isVarArg()); - - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; - // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); - - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); - - // Propagate personality info to the new function if there is one. - if (oldFunction->hasPersonalityFn()) - newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); - - // Inherit all of the target dependent attributes and white-listed - // target independent attributes. - // (e.g. If the extracted region contains a call to an x86.sse - // instruction we need to make sure that the extracted region has the - // "target-features" attribute allowing it to be lowered. - // FIXME: This should be changed to check to see if a specific - // attribute can not be inherited. - for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { - if (Attr.isStringAttribute()) { - if (Attr.getKindAsString() == "thunk") - continue; - } else - switch (Attr.getKindAsEnum()) { - // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: - case Attribute::AllocSize: - case Attribute::ArgMemOnly: - case Attribute::Builtin: - case Attribute::ByVal: - case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::ElementType: - case Attribute::InAlloca: - case Attribute::InReg: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: - case Attribute::JumpTable: - case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: - case Attribute::NoBuiltin: - case Attribute::NoCapture: - case Attribute::NoMerge: - case Attribute::NoReturn: - case Attribute::NoSync: - case Attribute::NoUndef: - case Attribute::None: - case Attribute::NonNull: - case Attribute::Preallocated: - case Attribute::ReadNone: - case Attribute::ReadOnly: - case Attribute::Returned: - case Attribute::ReturnsTwice: - case Attribute::SExt: - case Attribute::Speculatable: - case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::SwiftAsync: - case Attribute::WillReturn: - case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::ByRef: - case Attribute::EndAttrKinds: - case Attribute::EmptyKey: - case Attribute::TombstoneKey: - continue; - // Those attributes should be safe to propagate to the extracted function. - case Attribute::AlwaysInline: - case Attribute::Cold: - case Attribute::DisableSanitizerInstrumentation: - case Attribute::Hot: - case Attribute::NoRecurse: - case Attribute::InlineHint: - case Attribute::MinSize: - case Attribute::NoCallback: - case Attribute::NoDuplicate: - case Attribute::NoFree: - case Attribute::NoImplicitFloat: - case Attribute::NoInline: - case Attribute::NonLazyBind: - case Attribute::NoRedZone: - case Attribute::NoUnwind: - case Attribute::NoSanitizeCoverage: - case Attribute::NullPointerIsValid: - case Attribute::OptForFuzzing: - case Attribute::OptimizeNone: - case Attribute::OptimizeForSize: - case Attribute::SafeStack: - case Attribute::ShadowCallStack: - case Attribute::SanitizeAddress: - case Attribute::SanitizeMemory: - case Attribute::SanitizeThread: - case Attribute::SanitizeHWAddress: - case Attribute::SanitizeMemTag: - case Attribute::SpeculativeLoadHardening: - case Attribute::StackProtect: - case Attribute::StackProtectReq: - case Attribute::StackProtectStrong: - case Attribute::StrictFP: - case Attribute::UWTable: - case Attribute::VScaleRange: - case Attribute::NoCfCheck: - case Attribute::MustProgress: - case Attribute::NoProfile: - break; - } - - newFunction->addFnAttr(Attr); - } + }); - // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto&& P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - newFunction->addParamAttr(P.index(), Attribute::SwiftError); - } - } + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + StructTy = StructType::get(M->getContext(), paramTy); + paramTy.clear(); + paramTy.push_back(PointerType::getUnqual(StructTy)); + } + FunctionType *funcType = FunctionType::get( + RetTy, paramTy, AllowVarArgs && oldFunction->isVarArg()); + + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + // Create the new function + Function *newFunction = Function::Create( + funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), + oldFunction->getName() + "." + SuffixToUse, M); + + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(); + + // Inherit the uwtable attribute if we need to. + if (oldFunction->hasUWTable()) + newFunction->setHasUWTable(); + + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. + // (e.g. If the extracted region contains a call to an x86.sse + // instruction we need to make sure that the extracted region has the + // "target-features" attribute allowing it to be lowered. + // FIXME: This should be changed to check to see if a specific + // attribute can not be inherited. + for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. + case Attribute::Alignment: + case Attribute::AllocSize: + case Attribute::ArgMemOnly: + case Attribute::Builtin: + case Attribute::ByVal: + case Attribute::Convergent: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoMerge: + case Attribute::NoReturn: + case Attribute::NoSync: + case Attribute::NoUndef: + case Attribute::None: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::ReturnsTwice: + case Attribute::SExt: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::WillReturn: + case Attribute::WriteOnly: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + continue; + // Those attributes should be safe to propagate to the extracted + // function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::DisableSanitizerInstrumentation: + case Attribute::Hot: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoCallback: + case Attribute::NoDuplicate: + case Attribute::NoFree: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::NoSanitizeCoverage: + case Attribute::NullPointerIsValid: + case Attribute::OptForFuzzing: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::ShadowCallStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::SanitizeMemTag: + case Attribute::SpeculativeLoadHardening: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + case Attribute::VScaleRange: + case Attribute::NoCfCheck: + case Attribute::MustProgress: + case Attribute::NoProfile: + break; + } + newFunction->addFnAttr(Attr); + } + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto &&P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + newFunction->addParamAttr(P.index(), Attribute::SwiftError); + } + } // Set names for input and output arguments. if (!AggregateArgs) { - Function::arg_iterator AI = newFunction->arg_begin(); + Function::arg_iterator AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) AI->setName(inputs[i]->getName()); for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } - return newFunction; -} - - - -void CodeExtractor::handleParams( - Function *oldFunction, Function *newFunction, - const ValueSet &inputs, - const ValueSet &outputs) { + return newFunction; } +void CodeExtractor::handleParams(Function *oldFunction, Function *newFunction, + const ValueSet &inputs, + const ValueSet &outputs) {} /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. @@ -1121,31 +1123,22 @@ static void insertLifetimeMarkersSurroundingCall( } } - - - - - - void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = (*Blocks.begin())->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - - - auto newFuncIt = newFunction->front().getIterator(); for (BasicBlock *Block : Blocks) { - // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(Block); - - // Insert this basic block into the new function - // Insert the original blocks after the entry block created - // for the new function. The entry block may be followed - // by a set of exit blocks at this point, but these exit - // blocks better be placed at the end of the new function. - newFuncIt = newBlocks.insertAfter(newFuncIt, Block); + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(Block); + + // Insert this basic block into the new function + // Insert the original blocks after the entry block created + // for the new function. The entry block may be followed + // by a set of exit blocks at this point, but these exit + // blocks better be placed at the end of the new function. + newFuncIt = newBlocks.insertAfter(newFuncIt, Block); } } @@ -1320,859 +1313,739 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, } Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, bool KeepOldBlocks ) { +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + bool KeepOldBlocks) { ValueSet Inputs, Outputs; - return extractCodeRegion(CEAC, Inputs, Outputs,KeepOldBlocks); + return extractCodeRegion(CEAC, Inputs, Outputs, KeepOldBlocks); } - - - - -void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header,bool NoExitBlockPHIs) { +void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, + bool NoExitBlockPHIs) { // BasicBlock *header = *Blocks.begin(); // Function *oldFunction = header->getParent(); - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // canonicalization - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(Header); - - recomputeExitBlocks(); + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); - severSplitPHINodesOfExits(); - // recomputeExitBlocks(); + // canonicalization + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + recomputeExitBlocks(); - if (NoExitBlockPHIs) { - // TODO: preserve BPI/BFI - for (BasicBlock *Block : Blocks) { - SmallVector Succs; - llvm::append_range(Succs, successors(Block) ); + severSplitPHINodesOfExits(); + // recomputeExitBlocks(); - for (BasicBlock *Succ : Succs) { - if (Blocks.count(Succ)) continue; + if (NoExitBlockPHIs) { + // TODO: preserve BPI/BFI + for (BasicBlock *Block : Blocks) { + SmallVector Succs; + llvm::append_range(Succs, successors(Block)); - if (!Succ->getSinglePredecessor()) { - Succ= SplitEdge(Block, Succ, DT); - } + for (BasicBlock *Succ : Succs) { + if (Blocks.count(Succ)) + continue; - // Ensure no PHI node in exit block (still possible with single predecessor, e.g. LCSSA) - while (auto P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues()==1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } + if (!Succ->getSinglePredecessor()) { + Succ = SplitEdge(Block, Succ, DT); } - - - recomputeExitBlocks(); + // Ensure no PHI node in exit block (still possible with single + // predecessor, e.g. LCSSA) + while (auto P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues() == 1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } } - - + recomputeExitBlocks(); + } } - -static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Blocks, Instruction *BranchI) { - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } +static void applyFirstDebugLoc(Function *oldFunction, + ArrayRef Blocks, + Instruction *BranchI) { + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } } - void CodeExtractor::recomputeExitBlocks() { - OldTargets.clear(); - ExitBlocks.clear(); - + OldTargets.clear(); + ExitBlocks.clear(); - for (BasicBlock* Block : Blocks) { - for (BasicBlock* Succ : successors(Block)) { - if (Blocks.count(Succ)) continue; + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (Blocks.count(Succ)) + continue; - ExitBlocks.insert(Succ); - OldTargets.push_back(Succ); - } + ExitBlocks.insert(Succ); + OldTargets.push_back(Succ); } - NumExitBlocks = ExitBlocks.size(); + } + NumExitBlocks = ExitBlocks.size(); } - - - Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, - ValueSet& inputs, ValueSet& outputs, bool KeepOldBlocks) { - if (!isEligible()) - return nullptr; - - - // Assumption: this is a single-entry code region, and the header is the first - // block in the region. - BasicBlock* header = *Blocks.begin(); - Function* oldFunction = header->getParent(); - Module* M = oldFunction->getParent(); - LLVMContext& Context = M->getContext(); - const DataLayout& DL = M->getDataLayout(); - - - - - - - - canonicalizeCFGForExtraction(header, KeepOldBlocks); - - - - +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + ValueSet &inputs, ValueSet &outputs, + bool KeepOldBlocks) { + if (!isEligible()) + return nullptr; - // Calculate the entry frequency of the new function before we change the root - // block. - BlockFrequency EntryFreq; - DenseMap ExitWeights; - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock* Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + Module *M = oldFunction->getParent(); + LLVMContext &Context = M->getContext(); + const DataLayout &DL = M->getDataLayout(); + + canonicalizeCFGForExtraction(header, KeepOldBlocks); + + // Calculate the entry frequency of the new function before we change the root + // block. + BlockFrequency EntryFreq; + DenseMap ExitWeights; + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } - for (BasicBlock* Succ : ExitBlocks) { - for (BasicBlock* Block : predecessors(Succ)) { - if (!Blocks.count(Block)) continue; + for (BasicBlock *Succ : ExitBlocks) { + for (BasicBlock *Block : predecessors(Succ)) { + if (!Blocks.count(Block)) + continue; - BlockFrequency& BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } - } + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } } + } - - if (!KeepOldBlocks) { - // Transforms/HotColdSplit/stale-assume-in-original-func.ll - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock* Block : Blocks) { - for (Instruction& I : llvm::make_early_inc_range(*Block)) { - if (auto* AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); - } - } + if (!KeepOldBlocks) { + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); } + } } + } + ValueSet SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + // analysis, after ret splitting (for values returned) + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs, SinkingCands); - ValueSet SinkingCands, HoistingCands; - BasicBlock* CommonExit = nullptr; - findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); - assert(HoistingCands.empty() || CommonExit); - - - - // analysis, after ret splitting (for values returned) - // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs, SinkingCands); - - - - - - - - - - // Construct new function based on inputs/outputs & add allocas for all defs. - Function* newFunction = constructFunctionDeclaration(inputs, outputs, header); - - - - - //// CodeGen newFunction implementation /////////////////////////////////////////////////// - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock* newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - - ValueToValueMapTy VMap; - + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); - SmallVector AdditionalRemap; - auto MoveOrCopyInst = [KeepOldBlocks](Instruction* I, BasicBlock* IB, BasicBlock::iterator IP) -> Instruction* { - if (KeepOldBlocks) { - auto AI = I->clone(); - AI->setName(I->getName()); - IB->getInstList().insert(IP, AI); - return AI; - } - I->moveBefore(*IB, IP); - return I; - }; + //// CodeGen newFunction implementation + ////////////////////////////////////////////////////// + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = + BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. + ValueToValueMapTy VMap; - for (auto* II : SinkingCands) { - if (!isa(II)) { - auto New = MoveOrCopyInst(cast(II), newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (KeepOldBlocks) { - AdditionalRemap.push_back(New); - VMap[II] = New; - } - } - } - for (auto* II : SinkingCands) { - if (auto* AI = dyn_cast(II)) { - AI = cast(MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); - if (KeepOldBlocks) { - AdditionalRemap.push_back(AI); - VMap[II] = AI; - } - } + SmallVector AdditionalRemap; + auto MoveOrCopyInst = + [KeepOldBlocks](Instruction *I, BasicBlock *IB, + BasicBlock::iterator IP) -> Instruction * { + if (KeepOldBlocks) { + auto AI = I->clone(); + AI->setName(I->getName()); + IB->getInstList().insert(IP, AI); + return AI; } + I->moveBefore(*IB, IP); + return I; + }; - - - if (!HoistingCands.empty()) { - auto* HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction* TI = HoistToBlock->getTerminator(); - for (auto* II : HoistingCands) { - // MoveOrCopyInst(cast(II), HoistToBlock, TI->getIterator()); - cast(II)->moveBefore(TI); - } - recomputeExitBlocks(); + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + + for (auto *II : SinkingCands) { + if (!isa(II)) { + auto New = MoveOrCopyInst(cast(II), newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + if (KeepOldBlocks) { + AdditionalRemap.push_back(New); + VMap[II] = New; + } } - - - std::map ExitBlockMap; - SmallDenseMap ExitBlockSwitchIdx; - SmallVector Orlder; - - for (BasicBlock* OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - - auto Added = ExitBlockSwitchIdx.insert({ OldTarget, ExitBlockSwitchIdx.size() }); - if (Added.second) - Orlder.push_back(OldTarget); + } + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI = cast( + MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); + if (KeepOldBlocks) { + AdditionalRemap.push_back(AI); + VMap[II] = AI; + } } + } + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) { + // MoveOrCopyInst(cast(II), HoistToBlock, + // TI->getIterator()); + cast(II)->moveBefore(TI); + } + recomputeExitBlocks(); + } + std::map ExitBlockMap; + SmallDenseMap ExitBlockSwitchIdx; + SmallVector Orlder; + for (BasicBlock *OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + auto Added = + ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); + if (Added.second) + Orlder.push_back(OldTarget); + } + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + StructType *StructTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) + StructTy = cast(newFunction->getArg(0)->getType()); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + NewValues.push_back(RewriteVal); + } - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - - - - - - + for (auto &&P : enumerate(inputs)) { + VMap[P.value()] = NewValues[P.index()]; + } + //// Copy/Move code + /////////////////////////////////////////////////////////////////////////////// - StructType* StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) - StructTy = cast(newFunction->getArg(0)->getType()); + // Determine position for the replacement code + auto ReplIP = header; + if (!KeepOldBlocks) { + while (ReplIP && Blocks.count(ReplIP)) { + ReplIP = ReplIP->getNextNode(); + } + } + if (KeepOldBlocks) { + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, + newFunction /*, nullptr, &DIFinder*/); + // Add basic block mapping. + VMap[Block] = CBB; - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* RewriteVal; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction* TI = newFunction->begin()->getTerminator(); - GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, "loadgep_" + inputs[i]->getName(), TI); + for (auto &&P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); } - else - RewriteVal = &*AI++; - - NewValues.push_back(RewriteVal); + } } - for (auto&& P : enumerate(inputs)) { - VMap[P.value()] = NewValues[P.index()]; + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; } + } else { + moveCodeToFunction(newFunction); - - - - //// Copy/Move code //////////////////////////////////////////////////////////////////////////// - - // Determine position for the replacement code - auto ReplIP = header; if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) { - ReplIP = ReplIP->getNextNode(); - } + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), + inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } } + } + for (auto OldTarget : OldTargets) { + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; - if (KeepOldBlocks) { - - for (BasicBlock* Block : Blocks) { - BasicBlock* CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant* OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - - for (auto&& P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/ false); - } - } - } - - - - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } - - - } - else { - moveCodeToFunction(newFunction); + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; - if (!KeepOldBlocks) { - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* RewriteVal = NewValues[i]; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User* use : Users) - if (Instruction* inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } + auto &Context = Blocks.front()->getContext(); + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: + break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; } - for (auto OldTarget : OldTargets) { - BasicBlock*& NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); + ReturnInst::Create(Context, brVal, NewTarget); + } + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { VMap[OldTarget] = NewTarget; - - - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - - auto& Context = Blocks.front()->getContext(); - Value* brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - - ReturnInst::Create(Context, brVal, NewTarget); + } } + } + if (KeepOldBlocks) { + for (Instruction *II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + BasicBlock &Y = cast(*NewBlock); + // Loop over all instructions, fixing each one as we find it... - for (BasicBlock* Block : Blocks) { - Instruction* TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock* OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock* NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } - else { - VMap[OldTarget] = NewTarget; - } - } + for (Instruction &II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } - - - - - - if (KeepOldBlocks) { - for (Instruction* II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock* Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock& Y = cast(*NewBlock); - - // Loop over all instructions, fixing each one as we find it... - - for (Instruction& II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - }else{ - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode* PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - - + } else { + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); } + } + auto NewHeader = header; + if (KeepOldBlocks) + NewHeader = cast(VMap.lookup(NewHeader)); + assert(NewHeader); + auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; - - auto NewHeader=header; if (KeepOldBlocks) - NewHeader = cast(VMap.lookup(NewHeader)); - assert(NewHeader); - auto* BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - - - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto* OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto* InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto* Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction* InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create( - StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } - else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } - } - - - - - //// Codegen newFunction call replacement ////////////////////////////////////////////// - - // This takes place of the original loop - BasicBlock* codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); - BasicBlock* AllocaBlock = &oldFunction->front(); - - - - - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount(ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; } + } + //// Codegen newFunction call replacement + ///////////////////////////////////////////////// + + // This takes place of the original loop + BasicBlock *codeReplacer = + BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); + BasicBlock *AllocaBlock = &oldFunction->front(); + + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } + // Add inputs as params, or to be filled into the struct + unsigned ArgNo = 0; + std::vector params; - - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - std::vector params; - - AllocaInst* Struct = nullptr; - if (AggregateArgs && StructTy) { - std::vector StructValues; - for (Value* input : inputs) { - StructValues.push_back(input); - ++ArgNo; - } - - - - Struct = new AllocaInst(StructTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &AllocaBlock->front()); - - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructTy) { + std::vector StructValues; + for (Value *input : inputs) { + StructValues.push_back(input); + ++ArgNo; } + Struct = new AllocaInst(StructTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &AllocaBlock->front()); + params.push_back(Struct); - - - - std::vector ReloadOutputs; - std::vector Reloads; - if (!AggregateArgs) { - for (Value* input : inputs) { - params.push_back(input); - } - - - - - // Create allocas for the outputs - for (Value* output : outputs) { - AllocaInst* alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &AllocaBlock->front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); } + } - - - - - - - // Emit the call to the function - CallInst* call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); - - - // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto&& P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - call->addParamAttr(P.index(), Attribute::SwiftError); - } + std::vector ReloadOutputs; + std::vector Reloads; + if (!AggregateArgs) { + for (Value *input : inputs) { + params.push_back(input); } - - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); + // Create allocas for the outputs + for (Value *output : outputs) { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &AllocaBlock->front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); } + } + // Emit the call to the function + CallInst *call = + CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); - - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* Output = nullptr; - if (AggregateArgs) { - Value* Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst* GEP = GetElementPtrInst::Create(StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst* load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto &&P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); } + } + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = + new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } - // Now we can emit a switch statement using the call as a value. - SwitchInst* TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - - for (auto&& P : Orlder) { - auto OldTarget = P; - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + for (auto &&P : Orlder) { + auto OldTarget = P; + auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), + OldTarget); + } + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), + TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } - // Now that we've done the deed, simplify the switch instruction. - Type* OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, + call); + + //// Connect call replacement to CFG + /////////////////////////////////////////////////////////////////////////// + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in + // the region modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + if (!KeepOldBlocks) { + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } - else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } - else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); } + } + } - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); - break; - } - - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - - - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, call); - - - - - - //// Connect call replacement to CFG //////////////////////////////////////////////////////////////////////// - - + if (KeepOldBlocks) { + // Must be done after remap + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + auto OutIdx = P.index(); + auto OldVal = cast(P.value()); + auto NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), + (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are + // available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (auto &&U : make_early_inc_range(OldVal->uses())) { + auto User = dyn_cast(U.getUser()); + if (!User) + continue; + auto EffectiveUser = User->getParent(); + if (auto &&P = dyn_cast(User)) { + EffectiveUser = P->getIncomingBlock(U); + } - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto& U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction* I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) - I->replaceUsesOfWith(header, codeReplacer); + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + continue; - if (!KeepOldBlocks) { - for (BasicBlock* ExitBB : ExitBlocks) - for (PHINode& PN : ExitBB->phis()) { - Value* IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } + SSA.RewriteUseAfterInsertions(U); + } } - - - - if (KeepOldBlocks) { - // Must be done after remap - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - auto OutIdx = P.index(); - auto OldVal = cast(P.value()); - auto NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (auto&& U : make_early_inc_range(OldVal->uses())) { - auto User = dyn_cast(U.getUser()); - if (!User) continue; - auto EffectiveUser = User->getParent(); - if (auto&& P = dyn_cast(User)) { - EffectiveUser = P->getIncomingBlock(U); - } - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; - - - SSA.RewriteUseAfterInsertions(U); - } - } - } else { - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction* inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } - } + } else { + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + + std::vector Users(outputs[i]->user_begin(), + outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } } + } + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); - - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); - - - - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); - - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock& BB) { - const Instruction* Term = BB.getTerminator(); - return isa(Term) || isa(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); - + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { + const Instruction *Term = BB.getTerminator(); + return isa(Term) || isa(Term); + }); + if (doesNotReturn) + newFunction->setDoesNotReturn(); LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { newFunction->dump(); @@ -2185,10 +2058,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache& CEAC, return newFunction; } - - - - bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 189513123477b..44f96bdd85a73 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,7 +84,8 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest (unless using --bb-keep-blocks)." + "the first block in the sequence should dominate the rest (unless " + "using --bb-keep-blocks)." "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -92,16 +93,18 @@ static cl::list ExtractBlocks( cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); -static cl::opt KeepFunctions("bb-keep-functions", - cl::desc("When extracting blocks from functions, keep the original functions; extracted code is replaced by function call to new function"), - cl::cat(ExtractCat) - ); - -static cl::opt KeepBlocks("bb-keep-blocks", - cl::desc("Keep extracted blocks in original function after outlining, likely orphaned."), - cl::cat(ExtractCat) - ); +static cl::opt KeepFunctions( + "bb-keep-functions", + cl::desc( + "When extracting blocks from functions, keep the original functions; " + "extracted code is replaced by function call to new function"), + cl::cat(ExtractCat)); +static cl::opt + KeepBlocks("bb-keep-blocks", + cl::desc("Keep extracted blocks in original function after " + "outlining, likely orphaned."), + cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. static cl::list From 3531ac571e77169688ad8b3ff7fa725f2b2e149f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 05:00:38 -0600 Subject: [PATCH 071/130] eligability check --- .../llvm/Transforms/Utils/CodeExtractor.h | 28 +++++++++------ llvm/lib/Transforms/IPO/BlockExtractor.cpp | 4 +-- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 35 +++++++++---------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 0e6cda4bd4ab9..8740a6dee7ea5 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -98,6 +98,8 @@ class CodeExtractorAnalysisCache { // If true, varargs functions can be extracted. bool AllowVarArgs; + bool KeepOldBlocks; + // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; unsigned NumExitBlocks = std::numeric_limits::max(); @@ -113,7 +115,7 @@ class CodeExtractorAnalysisCache { // label, if non-empty, otherwise "extracted". std::string Suffix; - void recomputeExitBlocks(); + public: /// Create a code extractor for a sequence of blocks. @@ -130,8 +132,8 @@ class CodeExtractorAnalysisCache { bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, - std::string Suffix = ""); + bool AllowVarArgs = false, bool AllowAlloca = false, + std::string Suffix = "", bool KeepOldBlocks= false); /// Create a code extractor for a loop body. /// @@ -147,8 +149,7 @@ class CodeExtractorAnalysisCache { /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - bool KeepOldBlocks = false); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); /// Perform the extraction, returning the new function and providing an /// interface to see what was categorized as inputs and outputs. @@ -164,8 +165,7 @@ class CodeExtractorAnalysisCache { /// copied. \returns zero when called on a CodeExtractor instance where /// isEligible returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &Inputs, ValueSet &Outputs, - bool KeepOldBlocks = false); + ValueSet &Inputs, ValueSet &Outputs); /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as @@ -236,19 +236,25 @@ class CodeExtractorAnalysisCache { getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; + + + + void recomputeExitBlocks(); + void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); void splitReturnBlocks(); - void handleParams(Function *oldFunction, Function *newFunction, - const ValueSet &inputs, const ValueSet &outputs); + + + void canonicalizeCFGForExtraction(BasicBlock *&Header, + bool NoExitBlockPHIs); Function *constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header); - void canonicalizeCFGForExtraction(BasicBlock *&Header, - bool NoExitBlockPHIs); + void moveCodeToFunction(Function *newFunction); diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 79175bdabeef2..79cc8e8c80e4e 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -229,8 +229,8 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = CodeExtractor(BlocksToExtractVec) - .extractCodeRegion(CEAC, KeepOldBlocks); + Function *F = CodeExtractor(BlocksToExtractVec,/* DT */ nullptr,/* AggregateArgs*/ false, /* BFI */ nullptr,/* BPI */ nullptr, /* AC */nullptr,/* AllowVarArgs */ false,/* AllowAlloca */ false, /* Suffix */ "", KeepOldBlocks) + .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 4ba501459701d..0a6e72ec53f7e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -199,7 +199,7 @@ static bool isBlockValidForExtraction(const BasicBlock &BB, /// Build a set of blocks to extract if the input blocks are viable. static SetVector buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, - bool AllowVarArgs, bool AllowAlloca) { + bool AllowVarArgs, bool AllowAlloca, bool KeepOldBlocks) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -230,11 +230,12 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, continue; } -#if 0 + +if (!KeepOldBlocks) { // All blocks other than the first must not have predecessors outside of // the subgraph which is being extracted. for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB) && DT->isReachableFromEntry(PBB) { + if (!Result.count(PBB)) { LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " "outside the region except for the first block!\n" << "Problematic source BB: " << BB->getName() << "\n" @@ -242,7 +243,7 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, << "\n"); return {}; } -#endif +} } return Result; @@ -252,10 +253,10 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) + std::string Suffix, bool KeepOldBlocks) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), + BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, KeepOldBlocks)), Suffix(Suffix) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, @@ -263,10 +264,10 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), + BPI(BPI), AC(AC), AllowVarArgs(false),KeepOldBlocks(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, - /* AllowAlloca */ false)), + /* AllowAlloca */ false, /* KeepOldBlocks */ false)), Suffix(Suffix) {} /// definedInRegion - Return true if the specified value is defined in the @@ -1038,9 +1039,7 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, return newFunction; } -void CodeExtractor::handleParams(Function *oldFunction, Function *newFunction, - const ValueSet &inputs, - const ValueSet &outputs) {} + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. @@ -1313,10 +1312,9 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, } Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - bool KeepOldBlocks) { +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { ValueSet Inputs, Outputs; - return extractCodeRegion(CEAC, Inputs, Outputs, KeepOldBlocks); + return extractCodeRegion(CEAC, Inputs, Outputs); } void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, @@ -1335,7 +1333,7 @@ void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, recomputeExitBlocks(); severSplitPHINodesOfExits(); - // recomputeExitBlocks(); + if (NoExitBlockPHIs) { // TODO: preserve BPI/BFI @@ -1398,8 +1396,7 @@ void CodeExtractor::recomputeExitBlocks() { Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &inputs, ValueSet &outputs, - bool KeepOldBlocks) { + ValueSet &inputs, ValueSet &outputs) { if (!isEligible()) return nullptr; @@ -1476,7 +1473,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, SmallVector AdditionalRemap; auto MoveOrCopyInst = - [KeepOldBlocks](Instruction *I, BasicBlock *IB, + [this](Instruction *I, BasicBlock *IB, BasicBlock::iterator IP) -> Instruction * { if (KeepOldBlocks) { auto AI = I->clone(); From 471cccdcba5d3e077ac480e2dda66621c047ab36 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 05:26:43 -0600 Subject: [PATCH 072/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 142 ++++++++------------ 1 file changed, 55 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0a6e72ec53f7e..cda42f01f65ef 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -439,7 +439,6 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { } // Now add the old exit block to the outline region. Blocks.insert(CommonExitBlock); - // OldTargets.push_back(NewExitBlock); return CommonExitBlock; } @@ -500,11 +499,6 @@ CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { - if (Blocks.empty()) { - // ?? - return; - } - Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -660,8 +654,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - // Assume should not be the reason to introduce a parameter for the - // extracted function. + // Ignore assumptions if not been removed yet. if (isa(II)) continue; @@ -808,7 +801,6 @@ void CodeExtractor::splitReturnBlocks() { if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); - if (DT) { // Old dominates New. New node dominates all other nodes dominated // by Old. @@ -821,14 +813,6 @@ void CodeExtractor::splitReturnBlocks() { for (DomTreeNode *I : Children) DT->changeImmediateDominator(I, NewNode); } - - if (BFI) { - BFI->setBlockFreq(New, BFI->getBlockFreq(Block).getFrequency()); - } - if (BPI) { - // BPI->getEdgeProbability() - // BPI->setEdgeProbability(); - } } } @@ -846,24 +830,17 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: - RetTy = Type::getVoidTy(header->getContext()); - break; - case 2: - RetTy = Type::getInt1Ty(header->getContext()); - break; - default: - RetTy = Type::getInt16Ty(header->getContext()); - break; + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; } std::vector paramTy; - SmallVector VMapArg; + // Add the types of the input values to the function's argument list for (Value *value : inputs) { LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); - VMapArg.push_back(value); } // Add the types of the output values to the function's argument list. @@ -888,8 +865,9 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, paramTy.clear(); paramTy.push_back(PointerType::getUnqual(StructTy)); } - FunctionType *funcType = FunctionType::get( - RetTy, paramTy, AllowVarArgs && oldFunction->isVarArg()); + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, + AllowVarArgs && oldFunction->isVarArg()); std::string SuffixToUse = Suffix.empty() @@ -899,7 +877,6 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), oldFunction->getName() + "." + SuffixToUse, M); - // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) newFunction->setDoesNotThrow(); @@ -925,9 +902,9 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, continue; } else switch (Attr.getKindAsEnum()) { - // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. case Attribute::Alignment: case Attribute::AllocSize: case Attribute::ArgMemOnly: @@ -974,8 +951,7 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, case Attribute::EmptyKey: case Attribute::TombstoneKey: continue; - // Those attributes should be safe to propagate to the extracted - // function. + // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: case Attribute::Cold: case Attribute::DisableSanitizerInstrumentation: @@ -1020,12 +996,12 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, } // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto &&P : enumerate(inputs)) { + if (!AggregateArgs) + for (auto P : enumerate(inputs)) { if (P.value()->isSwiftError()) newFunction->addParamAttr(P.index(), Attribute::SwiftError); } - } + // Set names for input and output arguments. if (!AggregateArgs) { @@ -1039,8 +1015,6 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, return newFunction; } - - /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1317,52 +1291,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { return extractCodeRegion(CEAC, Inputs, Outputs); } -void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, - bool NoExitBlockPHIs) { - // BasicBlock *header = *Blocks.begin(); - // Function *oldFunction = header->getParent(); - - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // canonicalization - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(Header); - - recomputeExitBlocks(); - - severSplitPHINodesOfExits(); - - - if (NoExitBlockPHIs) { - // TODO: preserve BPI/BFI - for (BasicBlock *Block : Blocks) { - SmallVector Succs; - llvm::append_range(Succs, successors(Block)); - - for (BasicBlock *Succ : Succs) { - if (Blocks.count(Succ)) - continue; - - if (!Succ->getSinglePredecessor()) { - Succ = SplitEdge(Block, Succ, DT); - } - - // Ensure no PHI node in exit block (still possible with single - // predecessor, e.g. LCSSA) - while (auto P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues() == 1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } - } - - recomputeExitBlocks(); - } -} - static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Blocks, Instruction *BranchI) { @@ -1394,6 +1322,46 @@ void CodeExtractor::recomputeExitBlocks() { NumExitBlocks = ExitBlocks.size(); } + +void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs) { + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + + // If a PHI in an exit block has multiple invoming values from the outlined region, create a new PHI for those values within the region such that only PHI itself becomes an output value, not each of its incoming values individually. + recomputeExitBlocks(); + severSplitPHINodesOfExits(); + + // If the option was given, ensure there are no PHI nodes at all in the exit nodes themselves. + if (NoExitBlockPHIs) { + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : make_early_inc_range( successors(Block))) { + if (Blocks.count(Succ)) + continue; + + if (!Succ->getSinglePredecessor()) + Succ = SplitEdge(Block, Succ, DT); + + + // Ensure no PHI node in exit block (still possible with single + // predecessor, e.g. LCSSA) + while (auto *P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues() == 1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + + // Exit nodes may have changed by SplitEdge. +// TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) + recomputeExitBlocks(); + } +} + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &inputs, ValueSet &outputs) { From dec8cf80f3e7ca71caff1541144c5ef73f4777e6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 05:51:07 -0600 Subject: [PATCH 073/130] comment --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cda42f01f65ef..ac30713b377f2 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1544,8 +1544,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// Copy/Move code /////////////////////////////////////////////////////////////////////////////// - // Determine position for the replacement code - auto ReplIP = header; + // Determine position for the replacement code. Do so before header is moved to the new function. + BasicBlock* ReplIP = header; if (!KeepOldBlocks) { while (ReplIP && Blocks.count(ReplIP)) { ReplIP = ReplIP->getNextNode(); @@ -1553,7 +1553,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } if (KeepOldBlocks) { - for (BasicBlock *Block : Blocks) { BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); From db9c1e80c530d89a9c298fbb851b0c75202e916f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 15:25:54 -0600 Subject: [PATCH 074/130] preparing for refactor extract function --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index ac30713b377f2..1a733d7fba2c9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1426,6 +1426,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); + + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1503,12 +1512,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Orlder.push_back(OldTarget); } - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + StructType *StructTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) @@ -1554,6 +1558,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { for (BasicBlock *Block : Blocks) { + // TODO: Don't copy assumptions BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, newFunction /*, nullptr, &DIFinder*/); From 7c15b69bff21df8319ac1896400910a0b94cb4e4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 15:55:42 -0600 Subject: [PATCH 075/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1a733d7fba2c9..0bc18e11d698c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1498,7 +1498,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, recomputeExitBlocks(); } - std::map ExitBlockMap; + SmallDenseMap ExitBlockSwitchIdx; SmallVector Orlder; @@ -1613,6 +1613,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + + std::map ExitBlockMap; for (auto OldTarget : OldTargets) { BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; if (NewTarget) From 5c45602002ca6b37885bf1c42bc2761644faf428 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:05:07 -0600 Subject: [PATCH 076/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0bc18e11d698c..d9aefb476e227 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1378,6 +1378,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, canonicalizeCFGForExtraction(header, KeepOldBlocks); + + // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; @@ -1435,6 +1437,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1499,9 +1503,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + // Determine position for the replacement code. Do so before header is moved to the new function. + BasicBlock* ReplIP = header; + if (!KeepOldBlocks) { + while (ReplIP && Blocks.count(ReplIP)) { + ReplIP = ReplIP->getNextNode(); + } + } + SmallDenseMap ExitBlockSwitchIdx; SmallVector Orlder; - for (BasicBlock *OldTarget : OldTargets) { if (Blocks.count(OldTarget)) continue; @@ -1548,13 +1559,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// Copy/Move code /////////////////////////////////////////////////////////////////////////////// - // Determine position for the replacement code. Do so before header is moved to the new function. - BasicBlock* ReplIP = header; - if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) { - ReplIP = ReplIP->getNextNode(); - } - } + if (KeepOldBlocks) { for (BasicBlock *Block : Blocks) { From 6ad705f252c455e0a6c2d2e56350affe3e6da70e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:07:16 -0600 Subject: [PATCH 077/130] WIP --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 83 +++++++++++---------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d9aefb476e227..608cbb1cbdf68 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1380,29 +1380,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - // Calculate the entry frequency of the new function before we change the root - // block. - BlockFrequency EntryFreq; - DenseMap ExitWeights; - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } - for (BasicBlock *Succ : ExitBlocks) { - for (BasicBlock *Block : predecessors(Succ)) { - if (!Blocks.count(Block)) - continue; - - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } - } - } if (!KeepOldBlocks) { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -1438,6 +1416,48 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + recomputeExitBlocks(); + } + + + // Calculate the entry frequency of the new function before we change the root + // block. + BlockFrequency EntryFreq; + DenseMap ExitWeights; + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } + + for (BasicBlock *Succ : ExitBlocks) { + for (BasicBlock *Block : predecessors(Succ)) { + if (!Blocks.count(Block)) + continue; + + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } + } + } + + // Determine position for the replacement code. Do so before header is moved to the new function. + BasicBlock* ReplIP = header; + if (!KeepOldBlocks) { + while (ReplIP && Blocks.count(ReplIP)) { + ReplIP = ReplIP->getNextNode(); + } + } + + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); @@ -1491,25 +1511,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) { - // MoveOrCopyInst(cast(II), HoistToBlock, - // TI->getIterator()); - cast(II)->moveBefore(TI); - } - recomputeExitBlocks(); - } - - - // Determine position for the replacement code. Do so before header is moved to the new function. - BasicBlock* ReplIP = header; - if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) { - ReplIP = ReplIP->getNextNode(); - } - } SmallDenseMap ExitBlockSwitchIdx; SmallVector Orlder; From 3e2c3271e5622522ff49ef1036ba024637c04436 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:11:00 -0600 Subject: [PATCH 078/130] moved exit block SuccNum --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 608cbb1cbdf68..f4ab91d8955ba 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1425,6 +1425,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; @@ -1457,6 +1458,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } + SmallDenseMap ExitBlockSwitchIdx; + SmallVector Orlder; + for (BasicBlock *OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + + auto Added = + ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); + if (Added.second) + Orlder.push_back(OldTarget); + } + + // Construct new function based on inputs/outputs & add allocas for all defs. @@ -1512,17 +1526,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } - SmallDenseMap ExitBlockSwitchIdx; - SmallVector Orlder; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - - auto Added = - ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); - if (Added.second) - Orlder.push_back(OldTarget); - } @@ -1621,7 +1624,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, std::map ExitBlockMap; - for (auto OldTarget : OldTargets) { + // for (auto OldTarget : OldTargets) { + for (auto OldTarget : Orlder) { BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; if (NewTarget) continue; From d1de5672d751f52a1cf7839ec1fa19f74e2c574f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:14:31 -0600 Subject: [PATCH 079/130] Orlder --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f4ab91d8955ba..44d66bba5ea0b 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1625,19 +1625,22 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, std::map ExitBlockMap; // for (auto OldTarget : OldTargets) { - for (auto OldTarget : Orlder) { + // for (auto OldTarget : Orlder) { + for (auto &&P : enumerate( Orlder)) { + auto OldTarget = P.value(); + auto SuccNum = P.index(); + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; + //if (NewTarget) + // continue; // If we don't already have an exit stub for this non-extracted // destination, create one now! NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); - VMap[OldTarget] = NewTarget; - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; auto &Context = Blocks.front()->getContext(); Value *brVal = nullptr; From 694143c9b477bdb06864faa878b0e8bf64f63194 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:23:04 -0600 Subject: [PATCH 080/130] Prepare for refactoring FuncImpl --- llvm/include/llvm/Transforms/Utils/Cloning.h | 4 +- llvm/lib/Transforms/Utils/CloneFunction.cpp | 4 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 46 ++++++++++++-------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 5a1f322b20544..edf8a1cb932db 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -117,7 +117,9 @@ struct ClonedCodeInfo { BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr); + DebugInfoFinder *DIFinder = nullptr, + function_ref InstSelect = {} + ); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 048e691e33cf1..7842a6403d12e 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -43,7 +43,7 @@ using namespace llvm; BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo, - DebugInfoFinder *DIFinder) { + DebugInfoFinder *DIFinder, function_refInstSelect ) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -53,6 +53,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, // Loop over all instructions, and copy them over. for (const Instruction &I : *BB) { + if (InstSelect && !InstSelect(&I)) continue; + if (DIFinder && TheModule) DIFinder->processInstruction(*TheModule, I); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 44d66bba5ea0b..320dd40cc5db4 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1425,6 +1425,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } + // CFG/ExitBlocks fixed after here + // Calculate the entry frequency of the new function before we change the root // block. @@ -1476,6 +1478,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + StructType *StructArgTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) + StructArgTy = cast(newFunction->getArg(0)->getType()); + + + //// CodeGen newFunction implementation ////////////////////////////////////////////////////// @@ -1529,9 +1543,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) - StructTy = cast(newFunction->getArg(0)->getType()); + // Create an iterator to name all of the arguments we inserted. Function::arg_iterator AI = newFunction->arg_begin(); @@ -1547,8 +1559,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); Instruction *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = &*AI++; @@ -1567,9 +1579,12 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (KeepOldBlocks) { for (BasicBlock *Block : Blocks) { - // TODO: Don't copy assumptions BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, - newFunction /*, nullptr, &DIFinder*/); + newFunction ,/* CodeInfo */ nullptr, /* DIFinder */ nullptr, + [](const Instruction*I)->bool { + return !isa(I); + } + ); // Add basic block mapping. VMap[Block] = CBB; @@ -1636,8 +1651,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // If we don't already have an exit stub for this non-extracted // destination, create one now! - NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", - newFunction); + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); VMap[OldTarget] = NewTarget; // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; @@ -1715,10 +1729,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke @@ -1755,7 +1765,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); new StoreInst(OutI, GEP, InsertBefore); // Since there should be only one struct argument aggregating // all the output values, we shouldn't increment OAI, which always @@ -1788,14 +1798,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, std::vector params; AllocaInst *Struct = nullptr; - if (AggregateArgs && StructTy) { + if (AggregateArgs && StructArgTy) { std::vector StructValues; for (Value *input : inputs) { StructValues.push_back(input); ++ArgNo; } - Struct = new AllocaInst(StructTy, DL.getAllocaAddrSpace(), nullptr, + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", &AllocaBlock->front()); params.push_back(Struct); @@ -1805,7 +1815,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); new StoreInst(StructValues[i], GEP, codeReplacer); } @@ -1858,7 +1868,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { From 214fd1807a4c1c627fcd00c800eccc482f1be0db Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 16:38:43 -0600 Subject: [PATCH 081/130] extracted FuncImpl --- .../llvm/Transforms/Utils/CodeExtractor.h | 9 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 610 +++++++++--------- 2 files changed, 322 insertions(+), 297 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 8740a6dee7ea5..32c16ff20f970 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -254,7 +254,14 @@ class CodeExtractorAnalysisCache { const ValueSet &outputs, BasicBlock *header); - + void constructFunctionImplementation( + Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs, + BasicBlock *header, + const ValueSet &SinkingCands + ,StructType *StructArgTy + ,ArrayRef Orlder + ) ; void moveCodeToFunction(Function *newFunction); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 320dd40cc5db4..aa796b7b6c20c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1015,6 +1015,318 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, return newFunction; } +static void applyFirstDebugLoc(Function *oldFunction, + ArrayRef Blocks, + Instruction *BranchI) { + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } +} + + +void CodeExtractor:: constructFunctionImplementation( + Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs, + BasicBlock *header + , const ValueSet &SinkingCands + ,StructType *StructArgTy + ,ArrayRef Orlder +) { + Function*oldFunction = header->getParent(); + LLVMContext & Context = oldFunction->getContext(); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = + BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); + + ValueToValueMapTy VMap; + + SmallVector AdditionalRemap; + auto MoveOrCopyInst = + [this](Instruction *I, BasicBlock *IB, + BasicBlock::iterator IP) -> Instruction * { + if (KeepOldBlocks) { + auto AI = I->clone(); + AI->setName(I->getName()); + IB->getInstList().insert(IP, AI); + return AI; + } + I->moveBefore(*IB, IP); + return I; + }; + + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + + for (auto *II : SinkingCands) { + if (!isa(II)) { + auto New = MoveOrCopyInst(cast(II), newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + if (KeepOldBlocks) { + AdditionalRemap.push_back(New); + VMap[II] = New; + } + } + } + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI = cast( + MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); + if (KeepOldBlocks) { + AdditionalRemap.push_back(AI); + VMap[II] = AI; + } + } + } + + + + + + + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + NewValues.push_back(RewriteVal); + } + + for (auto &&P : enumerate(inputs)) { + VMap[P.value()] = NewValues[P.index()]; + } + + //// Copy/Move code + /////////////////////////////////////////////////////////////////////////////// + + + + if (KeepOldBlocks) { + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, + newFunction ,/* CodeInfo */ nullptr, /* DIFinder */ nullptr, + [](const Instruction*I)->bool { + return !isa(I); + } + ); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); + + for (auto &&P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); + } + } + } + + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; + } + + } else { + moveCodeToFunction(newFunction); + + if (!KeepOldBlocks) { + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), + inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } + } + + + std::map ExitBlockMap; + // for (auto OldTarget : OldTargets) { + // for (auto OldTarget : Orlder) { + for (auto &&P : enumerate( Orlder)) { + auto OldTarget = P.value(); + auto SuccNum = P.index(); + + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + //if (NewTarget) + // continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); + VMap[OldTarget] = NewTarget; + + // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; + + auto &Context = Blocks.front()->getContext(); + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: + break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + } + + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } + } + } + + if (KeepOldBlocks) { + for (Instruction *II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + BasicBlock &Y = cast(*NewBlock); + + // Loop over all instructions, fixing each one as we find it... + + for (Instruction &II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + } else { + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + } + + auto NewHeader = header; + if (KeepOldBlocks) + NewHeader = cast(VMap.lookup(NewHeader)); + assert(NewHeader); + auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + if (KeepOldBlocks) + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; + } + } +} + + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1291,20 +1603,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { return extractCodeRegion(CEAC, Inputs, Outputs); } -static void applyFirstDebugLoc(Function *oldFunction, - ArrayRef Blocks, - Instruction *BranchI) { - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } -} + void CodeExtractor::recomputeExitBlocks() { OldTargets.clear(); @@ -1493,288 +1792,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = - BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - - ValueToValueMapTy VMap; - - SmallVector AdditionalRemap; - auto MoveOrCopyInst = - [this](Instruction *I, BasicBlock *IB, - BasicBlock::iterator IP) -> Instruction * { - if (KeepOldBlocks) { - auto AI = I->clone(); - AI->setName(I->getName()); - IB->getInstList().insert(IP, AI); - return AI; - } - I->moveBefore(*IB, IP); - return I; - }; - - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - - for (auto *II : SinkingCands) { - if (!isa(II)) { - auto New = MoveOrCopyInst(cast(II), newFuncRoot, - newFuncRoot->getFirstInsertionPt()); - if (KeepOldBlocks) { - AdditionalRemap.push_back(New); - VMap[II] = New; - } - } - } - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI = cast( - MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); - if (KeepOldBlocks) { - AdditionalRemap.push_back(AI); - VMap[II] = AI; - } - } - } - - - - - - - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - NewValues.push_back(RewriteVal); - } - - for (auto &&P : enumerate(inputs)) { - VMap[P.value()] = NewValues[P.index()]; - } - - //// Copy/Move code - /////////////////////////////////////////////////////////////////////////////// - - - - if (KeepOldBlocks) { - for (BasicBlock *Block : Blocks) { - BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, - newFunction ,/* CodeInfo */ nullptr, /* DIFinder */ nullptr, - [](const Instruction*I)->bool { - return !isa(I); - } - ); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - for (auto &&P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); - } - } - } - - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } - - } else { - moveCodeToFunction(newFunction); - - if (!KeepOldBlocks) { - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; - - std::vector Users(inputs[i]->user_begin(), - inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } - } - - - std::map ExitBlockMap; - // for (auto OldTarget : OldTargets) { - // for (auto OldTarget : Orlder) { - for (auto &&P : enumerate( Orlder)) { - auto OldTarget = P.value(); - auto SuccNum = P.index(); - - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - //if (NewTarget) - // continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); - VMap[OldTarget] = NewTarget; - - // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - auto &Context = Blocks.front()->getContext(); - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: - break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - } - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } - } - } - - if (KeepOldBlocks) { - for (Instruction *II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock *Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock &Y = cast(*NewBlock); - - // Loop over all instructions, fixing each one as we find it... - - for (Instruction &II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - } else { - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - } - - auto NewHeader = header; - if (KeepOldBlocks) - NewHeader = cast(VMap.lookup(NewHeader)); - assert(NewHeader); - auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } - } + constructFunctionImplementation( newFunction, inputs, outputs, header, SinkingCands, StructArgTy, Orlder ); //// Codegen newFunction call replacement ///////////////////////////////////////////////// From b6dc69b88bf7fb363d1299ae91d885bfb75e6862 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 17:10:52 -0600 Subject: [PATCH 082/130] extracted emitReplacerCall --- .../llvm/Transforms/Utils/CodeExtractor.h | 15 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 398 ++++++++++-------- 2 files changed, 225 insertions(+), 188 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 32c16ff20f970..e198f0422c505 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -254,7 +254,7 @@ class CodeExtractorAnalysisCache { const ValueSet &outputs, BasicBlock *header); - void constructFunctionImplementation( + void emitFunction( Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, @@ -263,6 +263,19 @@ class CodeExtractorAnalysisCache { ,ArrayRef Orlder ) ; + CallInst * emitReplacerCall( + Function *oldFunction, + BasicBlock *header + , BasicBlock *ReplIP + , Function *newFunction + , const ValueSet &inputs, const ValueSet &outputs + , BlockFrequency EntryFreq + , StructType *StructArgTy + ,ArrayRef Orlder + , const SetVector &LifetimesStart + , std::vector &Reloads + ); + void moveCodeToFunction(Function *newFunction); void calculateNewCallTerminatorWeights( diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index aa796b7b6c20c..0a3755841a407 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1031,7 +1031,7 @@ static void applyFirstDebugLoc(Function *oldFunction, } -void CodeExtractor:: constructFunctionImplementation( +void CodeExtractor:: emitFunction( Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header @@ -1327,6 +1327,7 @@ void CodeExtractor:: constructFunctionImplementation( } + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1408,6 +1409,193 @@ static void insertLifetimeMarkersSurroundingCall( } } +CallInst * CodeExtractor:: emitReplacerCall( + Function *oldFunction, + BasicBlock *header // NewHeader + , BasicBlock *ReplIP + , Function *newFunction + , const ValueSet &inputs, const ValueSet &outputs + , BlockFrequency EntryFreq + , StructType *StructArgTy + ,ArrayRef Orlder + , const SetVector &LifetimesStart + , std::vector &Reloads +) { + LLVMContext & Context = oldFunction->getContext(); + Module *M = oldFunction->getParent(); + const DataLayout &DL = M->getDataLayout(); + + // This takes place of the original loop + BasicBlock *codeReplacer = + BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); + BasicBlock *AllocaBlock = &oldFunction->front(); + + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } + + // Add inputs as params, or to be filled into the struct + unsigned ArgNo = 0; + std::vector params; + + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructArgTy) { + std::vector StructValues; + for (Value *input : inputs) { + StructValues.push_back(input); + ++ArgNo; + } + + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &AllocaBlock->front()); + + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + + std::vector ReloadOutputs; + + if (!AggregateArgs) { + for (Value *input : inputs) { + params.push_back(input); + } + + // Create allocas for the outputs + for (Value *output : outputs) { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &AllocaBlock->front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + // Emit the call to the function + CallInst *call = + CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); + + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto &&P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); + } + } + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = + new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + for (auto &&P : enumerate( Orlder)) { + auto OldTarget = P.value(); + auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; + + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), + OldTarget); + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), + TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, call); + + return call; +} + + + void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = (*Blocks.begin())->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); @@ -1792,177 +1980,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - constructFunctionImplementation( newFunction, inputs, outputs, header, SinkingCands, StructArgTy, Orlder ); + emitFunction( newFunction, inputs, outputs, header, SinkingCands, StructArgTy, Orlder ); //// Codegen newFunction call replacement ///////////////////////////////////////////////// + std::vectorReloads; + CallInst * call = emitReplacerCall(oldFunction, header, ReplIP, newFunction, inputs, outputs, EntryFreq, StructArgTy, Orlder, LifetimesStart,Reloads); + BasicBlock *codeReplacer = call->getParent(); - // This takes place of the original loop - BasicBlock *codeReplacer = - BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); - BasicBlock *AllocaBlock = &oldFunction->front(); - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - std::vector params; - - AllocaInst *Struct = nullptr; - if (AggregateArgs && StructArgTy) { - std::vector StructValues; - for (Value *input : inputs) { - StructValues.push_back(input); - ++ArgNo; - } - - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", &AllocaBlock->front()); - - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } - - std::vector ReloadOutputs; - std::vector Reloads; - if (!AggregateArgs) { - for (Value *input : inputs) { - params.push_back(input); - } - - // Create allocas for the outputs - for (Value *output : outputs) { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, - output->getName() + ".loc", &AllocaBlock->front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } - - // Emit the call to the function - CallInst *call = - CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); - - // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto &&P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - call->addParamAttr(P.index(), Attribute::SwiftError); - } - } - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = - new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - for (auto &&P : Orlder) { - auto OldTarget = P; - auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), - OldTarget); - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), - TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, - call); //// Connect call replacement to CFG /////////////////////////////////////////////////////////////////////////// @@ -1978,25 +2003,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (I->isTerminator() && I->getFunction() == oldFunction) I->replaceUsesOfWith(header, codeReplacer); - if (!KeepOldBlocks) { - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - } if (KeepOldBlocks) { // Must be done after remap @@ -2030,6 +2036,24 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } } } else { + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { auto load = Reloads[i]; From e381872d3c4fa4348fe9af42ff642392ab25a212 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 17:23:40 -0600 Subject: [PATCH 083/130] extracted insertReplacer --- .../llvm/Transforms/Utils/CodeExtractor.h | 11 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 172 ++++++++++-------- 2 files changed, 103 insertions(+), 80 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index e198f0422c505..620128c993194 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -276,11 +276,20 @@ class CodeExtractorAnalysisCache { , std::vector &Reloads ); + void insertReplacerCall( + Function *oldFunction, + BasicBlock *header, + BasicBlock *codeReplacer + , const ValueSet &outputs + , ArrayRefReloads + , const DenseMap &ExitWeights + ); + void moveCodeToFunction(Function *newFunction); void calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, - DenseMap &ExitWeights, + const DenseMap &ExitWeights, BranchProbabilityInfo *BPI); }; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0a3755841a407..f92e4420e54d5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1595,6 +1595,96 @@ CallInst * CodeExtractor:: emitReplacerCall( } +void CodeExtractor:: insertReplacerCall( + Function *oldFunction, + BasicBlock *header, + BasicBlock *codeReplacer + , const ValueSet &outputs + , ArrayRefReloads + , const DenseMap &ExitWeights +){ + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in + // the region modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + + if (KeepOldBlocks) { + // Must be done after remap + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + auto OutIdx = P.index(); + auto OldVal = cast(P.value()); + auto NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), + (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are + // available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (auto &&U : make_early_inc_range(OldVal->uses())) { + auto User = dyn_cast(U.getUser()); + if (!User) + continue; + auto EffectiveUser = User->getParent(); + if (auto &&P = dyn_cast(User)) { + EffectiveUser = P->getIncomingBlock(U); + } + + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + continue; + + SSA.RewriteUseAfterInsertions(U); + } + } + } else { + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + + std::vector Users(outputs[i]->user_begin(), + outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + + +} + void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = (*Blocks.begin())->getParent(); @@ -1617,7 +1707,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, - DenseMap &ExitWeights, + const DenseMap &ExitWeights, BranchProbabilityInfo *BPI) { using Distribution = BlockFrequencyInfoImplBase::Distribution; using BlockNode = BlockFrequencyInfoImplBase::BlockNode; @@ -1635,7 +1725,7 @@ void CodeExtractor::calculateNewCallTerminatorWeights( // Add each of the frequencies of the successors. for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { BlockNode ExitNode(i); - uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); + uint64_t ExitFreq = ExitWeights .lookup(TI->getSuccessor(i)).getFrequency(); if (ExitFreq != 0) BranchDist.addExit(ExitNode, ExitFreq); else @@ -1992,84 +2082,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// Connect call replacement to CFG /////////////////////////////////////////////////////////////////////////// - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in - // the region modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) - I->replaceUsesOfWith(header, codeReplacer); - - - if (KeepOldBlocks) { - // Must be done after remap - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - auto OutIdx = P.index(); - auto OldVal = cast(P.value()); - auto NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), - (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are - // available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (auto &&U : make_early_inc_range(OldVal->uses())) { - auto User = dyn_cast(U.getUser()); - if (!User) - continue; - auto EffectiveUser = User->getParent(); - if (auto &&P = dyn_cast(User)) { - EffectiveUser = P->getIncomingBlock(U); - } - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) - continue; - - SSA.RewriteUseAfterInsertions(U); - } - } - } else { - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), - outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } + insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, ExitWeights); - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); From 2c716189701318ddb2e09b41d4cb33d4a939106a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 17:24:20 -0600 Subject: [PATCH 084/130] clang-format --- llvm/include/llvm/Transforms/Utils/Cloning.h | 12 +- .../llvm/Transforms/Utils/CodeExtractor.h | 65 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 9 +- llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1213 ++++++++--------- 5 files changed, 631 insertions(+), 680 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index edf8a1cb932db..f9371c67d76ff 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,12 +114,12 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. -BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr, - function_ref InstSelect = {} - ); +BasicBlock * +CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix = "", Function *F = nullptr, + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr, + function_ref InstSelect = {}); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 620128c993194..efa19712d1e84 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -115,8 +115,6 @@ class CodeExtractorAnalysisCache { // label, if non-empty, otherwise "extracted". std::string Suffix; - - public: /// Create a code extractor for a sequence of blocks. /// @@ -131,9 +129,9 @@ class CodeExtractorAnalysisCache { CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, - std::string Suffix = "", bool KeepOldBlocks= false); + AssumptionCache *AC = nullptr, bool AllowVarArgs = false, + bool AllowAlloca = false, std::string Suffix = "", + bool KeepOldBlocks = false); /// Create a code extractor for a loop body. /// @@ -236,17 +234,12 @@ class CodeExtractorAnalysisCache { getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; - - - - void recomputeExitBlocks(); + void recomputeExitBlocks(); void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); void splitReturnBlocks(); - - void canonicalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); @@ -254,42 +247,30 @@ class CodeExtractorAnalysisCache { const ValueSet &outputs, BasicBlock *header); - void emitFunction( - Function *newFunction, - const ValueSet &inputs, const ValueSet &outputs, - BasicBlock *header, - const ValueSet &SinkingCands - ,StructType *StructArgTy - ,ArrayRef Orlder - ) ; - - CallInst * emitReplacerCall( - Function *oldFunction, - BasicBlock *header - , BasicBlock *ReplIP - , Function *newFunction - , const ValueSet &inputs, const ValueSet &outputs - , BlockFrequency EntryFreq - , StructType *StructArgTy - ,ArrayRef Orlder - , const SetVector &LifetimesStart - , std::vector &Reloads - ); - - void insertReplacerCall( - Function *oldFunction, - BasicBlock *header, - BasicBlock *codeReplacer - , const ValueSet &outputs - , ArrayRefReloads - , const DenseMap &ExitWeights - ); + void emitFunction(Function *newFunction, const ValueSet &inputs, + const ValueSet &outputs, BasicBlock *header, + const ValueSet &SinkingCands, StructType *StructArgTy, + ArrayRef Orlder); + + CallInst *emitReplacerCall(Function *oldFunction, BasicBlock *header, + BasicBlock *ReplIP, Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs, + BlockFrequency EntryFreq, + StructType *StructArgTy, + ArrayRef Orlder, + const SetVector &LifetimesStart, + std::vector &Reloads); + + void insertReplacerCall( + Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, + const ValueSet &outputs, ArrayRef Reloads, + const DenseMap &ExitWeights); void moveCodeToFunction(Function *newFunction); void calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, - const DenseMap &ExitWeights, + const DenseMap &ExitWeights, BranchProbabilityInfo *BPI); }; diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 79cc8e8c80e4e..408bb55235d7e 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -229,8 +229,13 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = CodeExtractor(BlocksToExtractVec,/* DT */ nullptr,/* AggregateArgs*/ false, /* BFI */ nullptr,/* BPI */ nullptr, /* AC */nullptr,/* AllowVarArgs */ false,/* AllowAlloca */ false, /* Suffix */ "", KeepOldBlocks) - .extractCodeRegion(CEAC); + Function *F = + CodeExtractor(BlocksToExtractVec, /* DT */ nullptr, + /* AggregateArgs*/ false, /* BFI */ nullptr, + /* BPI */ nullptr, /* AC */ nullptr, + /* AllowVarArgs */ false, /* AllowAlloca */ false, + /* Suffix */ "", KeepOldBlocks) + .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 7842a6403d12e..c08adec77445f 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -40,10 +40,11 @@ using namespace llvm; #define DEBUG_TYPE "clone-function" /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo, - DebugInfoFinder *DIFinder, function_refInstSelect ) { +BasicBlock * +llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo, DebugInfoFinder *DIFinder, + function_ref InstSelect) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -53,7 +54,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, // Loop over all instructions, and copy them over. for (const Instruction &I : *BB) { - if (InstSelect && !InstSelect(&I)) continue; + if (InstSelect && !InstSelect(&I)) + continue; if (DIFinder && TheModule) DIFinder->processInstruction(*TheModule, I); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f92e4420e54d5..3030dba9df158 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -199,7 +199,8 @@ static bool isBlockValidForExtraction(const BasicBlock &BB, /// Build a set of blocks to extract if the input blocks are viable. static SetVector buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, - bool AllowVarArgs, bool AllowAlloca, bool KeepOldBlocks) { + bool AllowVarArgs, bool AllowAlloca, + bool KeepOldBlocks) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -230,20 +231,20 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, continue; } - -if (!KeepOldBlocks) { - // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. - for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB)) { - LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " - "outside the region except for the first block!\n" - << "Problematic source BB: " << BB->getName() << "\n" - << "Problematic destination BB: " << PBB->getName() - << "\n"); - return {}; - } -} + if (!KeepOldBlocks) { + // All blocks other than the first must not have predecessors outside of + // the subgraph which is being extracted. + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + LLVM_DEBUG(dbgs() + << "No blocks in this region may have entries from " + "outside the region except for the first block!\n" + << "Problematic source BB: " << BB->getName() << "\n" + << "Problematic destination BB: " << PBB->getName() + << "\n"); + return {}; + } + } } return Result; @@ -255,8 +256,10 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AllowVarArgs, bool AllowAlloca, std::string Suffix, bool KeepOldBlocks) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, KeepOldBlocks)), + BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), + KeepOldBlocks(KeepOldBlocks), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, + KeepOldBlocks)), Suffix(Suffix) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, @@ -264,10 +267,11 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false),KeepOldBlocks(false), + BPI(BPI), AC(AC), AllowVarArgs(false), KeepOldBlocks(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, - /* AllowAlloca */ false, /* KeepOldBlocks */ false)), + /* AllowAlloca */ false, + /* KeepOldBlocks */ false)), Suffix(Suffix) {} /// definedInRegion - Return true if the specified value is defined in the @@ -996,12 +1000,11 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, } // Set swifterror parameter attributes. - if (!AggregateArgs) + if (!AggregateArgs) for (auto P : enumerate(inputs)) { if (P.value()->isSwiftError()) newFunction->addParamAttr(P.index(), Attribute::SwiftError); } - // Set names for input and output arguments. if (!AggregateArgs) { @@ -1016,318 +1019,301 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, } static void applyFirstDebugLoc(Function *oldFunction, - ArrayRef Blocks, - Instruction *BranchI) { - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } + ArrayRef Blocks, + Instruction *BranchI) { + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } } +void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, + const ValueSet &outputs, BasicBlock *header, + const ValueSet &SinkingCands, + StructType *StructArgTy, + ArrayRef Orlder) { + Function *oldFunction = header->getParent(); + LLVMContext &Context = oldFunction->getContext(); -void CodeExtractor:: emitFunction( - Function *newFunction, - const ValueSet &inputs, const ValueSet &outputs, - BasicBlock *header - , const ValueSet &SinkingCands - ,StructType *StructArgTy - ,ArrayRef Orlder -) { - Function*oldFunction = header->getParent(); - LLVMContext & Context = oldFunction->getContext(); - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = - BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - - ValueToValueMapTy VMap; - - SmallVector AdditionalRemap; - auto MoveOrCopyInst = - [this](Instruction *I, BasicBlock *IB, - BasicBlock::iterator IP) -> Instruction * { - if (KeepOldBlocks) { - auto AI = I->clone(); - AI->setName(I->getName()); - IB->getInstList().insert(IP, AI); - return AI; - } - I->moveBefore(*IB, IP); - return I; - }; + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = + BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - - for (auto *II : SinkingCands) { - if (!isa(II)) { - auto New = MoveOrCopyInst(cast(II), newFuncRoot, - newFuncRoot->getFirstInsertionPt()); - if (KeepOldBlocks) { - AdditionalRemap.push_back(New); - VMap[II] = New; - } - } - } - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI = cast( - MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); - if (KeepOldBlocks) { - AdditionalRemap.push_back(AI); - VMap[II] = AI; - } - } + ValueToValueMapTy VMap; + + SmallVector AdditionalRemap; + auto MoveOrCopyInst = [this](Instruction *I, BasicBlock *IB, + BasicBlock::iterator IP) -> Instruction * { + if (KeepOldBlocks) { + auto AI = I->clone(); + AI->setName(I->getName()); + IB->getInstList().insert(IP, AI); + return AI; } + I->moveBefore(*IB, IP); + return I; + }; + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + + for (auto *II : SinkingCands) { + if (!isa(II)) { + auto New = MoveOrCopyInst(cast(II), newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + if (KeepOldBlocks) { + AdditionalRemap.push_back(New); + VMap[II] = New; + } + } + } + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI = cast( + MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); + if (KeepOldBlocks) { + AdditionalRemap.push_back(AI); + VMap[II] = AI; + } + } + } + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + NewValues.push_back(RewriteVal); + } + for (auto &&P : enumerate(inputs)) { + VMap[P.value()] = NewValues[P.index()]; + } + //// Copy/Move code + /////////////////////////////////////////////////////////////////////////////// + if (KeepOldBlocks) { + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock( + Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, + /* DIFinder */ nullptr, + [](const Instruction *I) -> bool { return !isa(I); }); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); + // Note return instructions for the caller. + // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + // Returns.push_back(RI); - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - NewValues.push_back(RewriteVal); + for (auto &&P : CBB->phis()) { + auto NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + if (Blocks.count(P.getIncomingBlock(Idx))) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); + } + } } - for (auto &&P : enumerate(inputs)) { - VMap[P.value()] = NewValues[P.index()]; + for (auto Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; } - //// Copy/Move code - /////////////////////////////////////////////////////////////////////////////// + } else { + moveCodeToFunction(newFunction); + if (!KeepOldBlocks) { + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; + std::vector Users(inputs[i]->user_begin(), + inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } + } - if (KeepOldBlocks) { - for (BasicBlock *Block : Blocks) { - BasicBlock *CBB = CloneBasicBlock(Block, VMap, {}, - newFunction ,/* CodeInfo */ nullptr, /* DIFinder */ nullptr, - [](const Instruction*I)->bool { - return !isa(I); - } - ); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); - - for (auto &&P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); - } - } - } + std::map ExitBlockMap; + // for (auto OldTarget : OldTargets) { + // for (auto OldTarget : Orlder) { + for (auto &&P : enumerate(Orlder)) { + auto OldTarget = P.value(); + auto SuccNum = P.index(); - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + // if (NewTarget) + // continue; - } else { - moveCodeToFunction(newFunction); - - if (!KeepOldBlocks) { - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; - - std::vector Users(inputs[i]->user_begin(), - inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } - } + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", + newFunction); + VMap[OldTarget] = NewTarget; + // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - std::map ExitBlockMap; - // for (auto OldTarget : OldTargets) { - // for (auto OldTarget : Orlder) { - for (auto &&P : enumerate( Orlder)) { - auto OldTarget = P.value(); - auto SuccNum = P.index(); + auto &Context = Blocks.front()->getContext(); + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: + break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - //if (NewTarget) - // continue; + ReturnInst::Create(Context, brVal, NewTarget); + } - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { VMap[OldTarget] = NewTarget; - - // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - - auto &Context = Blocks.front()->getContext(); - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: - break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); + } } + } - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } - } - } + if (KeepOldBlocks) { + for (Instruction *II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - if (KeepOldBlocks) { - for (Instruction *II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock *Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { - continue; - } - BasicBlock &Y = cast(*NewBlock); - - // Loop over all instructions, fixing each one as we find it... - - for (Instruction &II : Y) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - } else { - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - } + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) { + continue; + } + BasicBlock &Y = cast(*NewBlock); - auto NewHeader = header; - if (KeepOldBlocks) - NewHeader = cast(VMap.lookup(NewHeader)); - assert(NewHeader); - auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : Y) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + } else { + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + } - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; + auto NewHeader = header; + if (KeepOldBlocks) + NewHeader = cast(VMap.lookup(NewHeader)); + assert(NewHeader); + auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); + if (KeepOldBlocks) + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; } + } } - - /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1409,283 +1395,271 @@ static void insertLifetimeMarkersSurroundingCall( } } -CallInst * CodeExtractor:: emitReplacerCall( +CallInst *CodeExtractor::emitReplacerCall( Function *oldFunction, BasicBlock *header // NewHeader - , BasicBlock *ReplIP - , Function *newFunction - , const ValueSet &inputs, const ValueSet &outputs - , BlockFrequency EntryFreq - , StructType *StructArgTy - ,ArrayRef Orlder - , const SetVector &LifetimesStart - , std::vector &Reloads -) { - LLVMContext & Context = oldFunction->getContext(); - Module *M = oldFunction->getParent(); - const DataLayout &DL = M->getDataLayout(); - - // This takes place of the original loop - BasicBlock *codeReplacer = - BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); - BasicBlock *AllocaBlock = &oldFunction->front(); - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - std::vector params; + , + BasicBlock *ReplIP, Function *newFunction, const ValueSet &inputs, + const ValueSet &outputs, BlockFrequency EntryFreq, StructType *StructArgTy, + ArrayRef Orlder, const SetVector &LifetimesStart, + std::vector &Reloads) { + LLVMContext &Context = oldFunction->getContext(); + Module *M = oldFunction->getParent(); + const DataLayout &DL = M->getDataLayout(); - AllocaInst *Struct = nullptr; - if (AggregateArgs && StructArgTy) { - std::vector StructValues; - for (Value *input : inputs) { - StructValues.push_back(input); - ++ArgNo; - } + // This takes place of the original loop + BasicBlock *codeReplacer = + BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); + BasicBlock *AllocaBlock = &oldFunction->front(); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", &AllocaBlock->front()); + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } - params.push_back(Struct); + // Add inputs as params, or to be filled into the struct + unsigned ArgNo = 0; + std::vector params; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructArgTy) { + std::vector StructValues; + for (Value *input : inputs) { + StructValues.push_back(input); + ++ArgNo; } - std::vector ReloadOutputs; + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &AllocaBlock->front()); - if (!AggregateArgs) { - for (Value *input : inputs) { - params.push_back(input); - } + params.push_back(Struct); - // Create allocas for the outputs - for (Value *output : outputs) { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, - output->getName() + ".loc", &AllocaBlock->front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); } + } - // Emit the call to the function - CallInst *call = - CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); + std::vector ReloadOutputs; - // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto &&P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - call->addParamAttr(P.index(), Attribute::SwiftError); - } + if (!AggregateArgs) { + for (Value *input : inputs) { + params.push_back(input); } - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); + // Create allocas for the outputs + for (Value *output : outputs) { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &AllocaBlock->front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); } + } - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = - new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - } + // Emit the call to the function + CallInst *call = + CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto &&P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); + } + } - for (auto &&P : enumerate( Orlder)) { - auto OldTarget = P.value(); - auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), - OldTarget); + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; } + LoadInst *load = + new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), - TheSwitch); - } + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); - break; - } + for (auto &&P : enumerate(Orlder)) { + auto OldTarget = P.value(); + auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), + OldTarget); + } - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, call); + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), + TheSwitch); + } - return call; -} + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); -void CodeExtractor:: insertReplacerCall( - Function *oldFunction, - BasicBlock *header, - BasicBlock *codeReplacer - , const ValueSet &outputs - , ArrayRefReloads - , const DenseMap &ExitWeights -){ - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in - // the region modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) - I->replaceUsesOfWith(header, codeReplacer); + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, + call); + return call; +} - if (KeepOldBlocks) { - // Must be done after remap - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - auto OutIdx = P.index(); - auto OldVal = cast(P.value()); - auto NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), - (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are - // available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (auto &&U : make_early_inc_range(OldVal->uses())) { - auto User = dyn_cast(U.getUser()); - if (!User) - continue; - auto EffectiveUser = User->getParent(); - if (auto &&P = dyn_cast(User)) { - EffectiveUser = P->getIncomingBlock(U); - } - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) - continue; - - SSA.RewriteUseAfterInsertions(U); - } - } - } else { - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), - outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } +void CodeExtractor::insertReplacerCall( + Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, + const ValueSet &outputs, ArrayRef Reloads, + const DenseMap &ExitWeights) { + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) // FIXME: KeepOldBlocks? + // The BasicBlock which contains the branch is not in + // the region modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + if (KeepOldBlocks) { + // Must be done after remap + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + auto OutIdx = P.index(); + auto OldVal = cast(P.value()); + auto NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), + (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are + // available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (auto &&U : make_early_inc_range(OldVal->uses())) { + auto User = dyn_cast(U.getUser()); + if (!User) + continue; + auto EffectiveUser = User->getParent(); + if (auto &&P = dyn_cast(User)) { + EffectiveUser = P->getIncomingBlock(U); } + + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + continue; + + SSA.RewriteUseAfterInsertions(U); + } } + } else { + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto load = Reloads[i]; + + std::vector Users(outputs[i]->user_begin(), + outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); } - void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = (*Blocks.begin())->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); @@ -1707,7 +1681,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, - const DenseMap &ExitWeights, + const DenseMap &ExitWeights, BranchProbabilityInfo *BPI) { using Distribution = BlockFrequencyInfoImplBase::Distribution; using BlockNode = BlockFrequencyInfoImplBase::BlockNode; @@ -1725,7 +1699,7 @@ void CodeExtractor::calculateNewCallTerminatorWeights( // Add each of the frequencies of the successors. for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { BlockNode ExitNode(i); - uint64_t ExitFreq = ExitWeights .lookup(TI->getSuccessor(i)).getFrequency(); + uint64_t ExitFreq = ExitWeights.lookup(TI->getSuccessor(i)).getFrequency(); if (ExitFreq != 0) BranchDist.addExit(ExitNode, ExitFreq); else @@ -1881,8 +1855,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { return extractCodeRegion(CEAC, Inputs, Outputs); } - - void CodeExtractor::recomputeExitBlocks() { OldTargets.clear(); ExitBlocks.clear(); @@ -1899,44 +1871,47 @@ void CodeExtractor::recomputeExitBlocks() { NumExitBlocks = ExitBlocks.size(); } +void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, + bool NoExitBlockPHIs) { + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + + // If a PHI in an exit block has multiple invoming values from the outlined + // region, create a new PHI for those values within the region such that only + // PHI itself becomes an output value, not each of its incoming values + // individually. + recomputeExitBlocks(); + severSplitPHINodesOfExits(); + + // If the option was given, ensure there are no PHI nodes at all in the exit + // nodes themselves. + if (NoExitBlockPHIs) { + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : make_early_inc_range(successors(Block))) { + if (Blocks.count(Succ)) + continue; -void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs) { - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(Header); + if (!Succ->getSinglePredecessor()) + Succ = SplitEdge(Block, Succ, DT); - // If a PHI in an exit block has multiple invoming values from the outlined region, create a new PHI for those values within the region such that only PHI itself becomes an output value, not each of its incoming values individually. - recomputeExitBlocks(); - severSplitPHINodesOfExits(); - - // If the option was given, ensure there are no PHI nodes at all in the exit nodes themselves. - if (NoExitBlockPHIs) { - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : make_early_inc_range( successors(Block))) { - if (Blocks.count(Succ)) - continue; - - if (!Succ->getSinglePredecessor()) - Succ = SplitEdge(Block, Succ, DT); - - - // Ensure no PHI node in exit block (still possible with single - // predecessor, e.g. LCSSA) - while (auto *P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues() == 1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } + // Ensure no PHI node in exit block (still possible with single + // predecessor, e.g. LCSSA) + while (auto *P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues() == 1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); } - - // Exit nodes may have changed by SplitEdge. -// TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) - recomputeExitBlocks(); + } } + + // Exit nodes may have changed by SplitEdge. + // TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) + recomputeExitBlocks(); + } } Function * @@ -1955,10 +1930,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, canonicalizeCFGForExtraction(header, KeepOldBlocks); - - - - if (!KeepOldBlocks) { // Transforms/HotColdSplit/stale-assume-in-original-func.ll // Remove @llvm.assume calls that will be moved to the new function from the @@ -1983,7 +1954,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); - // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack @@ -1991,99 +1961,92 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet LifetimesStart; eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) - cast(II)->moveBefore(TI); - recomputeExitBlocks(); + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + recomputeExitBlocks(); } - // CFG/ExitBlocks fixed after here - // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; DenseMap ExitWeights; if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } - for (BasicBlock *Succ : ExitBlocks) { - for (BasicBlock *Block : predecessors(Succ)) { - if (!Blocks.count(Block)) - continue; + for (BasicBlock *Succ : ExitBlocks) { + for (BasicBlock *Block : predecessors(Succ)) { + if (!Blocks.count(Block)) + continue; - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); } + } } - // Determine position for the replacement code. Do so before header is moved to the new function. - BasicBlock* ReplIP = header; + // Determine position for the replacement code. Do so before header is moved + // to the new function. + BasicBlock *ReplIP = header; if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) { - ReplIP = ReplIP->getNextNode(); - } + while (ReplIP && Blocks.count(ReplIP)) { + ReplIP = ReplIP->getNextNode(); + } } SmallDenseMap ExitBlockSwitchIdx; SmallVector Orlder; for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; + if (Blocks.count(OldTarget)) + continue; - auto Added = - ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); - if (Added.second) - Orlder.push_back(OldTarget); + auto Added = + ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); + if (Added.second) + Orlder.push_back(OldTarget); } - - - // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); + std::advance(OutputArgBegin, inputs.size()); StructType *StructArgTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) - StructArgTy = cast(newFunction->getArg(0)->getType()); - - + StructArgTy = cast(newFunction->getArg(0)->getType()); //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - emitFunction( newFunction, inputs, outputs, header, SinkingCands, StructArgTy, Orlder ); + emitFunction(newFunction, inputs, outputs, header, SinkingCands, StructArgTy, + Orlder); //// Codegen newFunction call replacement ///////////////////////////////////////////////// - std::vectorReloads; - CallInst * call = emitReplacerCall(oldFunction, header, ReplIP, newFunction, inputs, outputs, EntryFreq, StructArgTy, Orlder, LifetimesStart,Reloads); + std::vector Reloads; + CallInst *call = emitReplacerCall(oldFunction, header, ReplIP, newFunction, + inputs, outputs, EntryFreq, StructArgTy, + Orlder, LifetimesStart, Reloads); BasicBlock *codeReplacer = call->getParent(); - //// Connect call replacement to CFG /////////////////////////////////////////////////////////////////////////// - insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, ExitWeights); - + insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, + ExitWeights); fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); From 90925073f6ed360d14962a923770df6a425c743b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 17:39:47 -0600 Subject: [PATCH 085/130] WIP --- llvm/include/llvm/Transforms/Utils/Cloning.h | 2 + .../llvm/Transforms/Utils/CodeExtractor.h | 2 - llvm/lib/Transforms/Utils/CodeExtractor.cpp | 10 +- .../tools/llvm-extract/extract-block-sink2.ll | 95 ------------------- llvm/tools/llvm-extract/llvm-extract.cpp | 6 +- 5 files changed, 8 insertions(+), 107 deletions(-) delete mode 100644 llvm/test/tools/llvm-extract/extract-block-sink2.ll diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index f9371c67d76ff..e46de638466e1 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,6 +114,8 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. +/// +/// If you would like to clone only a subset of instructions in the basic block, you can specify a callback returning true only for those instructions that are to be cloned. BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index efa19712d1e84..9084dd04dab60 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include namespace llvm { @@ -38,7 +37,6 @@ class Module; class Type; class Value; class StructType; -class LoadInst; /// A cache for the CodeExtractor analysis. The operation \ref /// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3030dba9df158..0fa6377ebf00c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1925,8 +1925,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); Module *M = oldFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); + + canonicalizeCFGForExtraction(header, KeepOldBlocks); @@ -2019,10 +2019,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); + + StructType *StructArgTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) diff --git a/llvm/test/tools/llvm-extract/extract-block-sink2.ll b/llvm/test/tools/llvm-extract/extract-block-sink2.ll deleted file mode 100644 index 55b1aba77d184..0000000000000 --- a/llvm/test/tools/llvm-extract/extract-block-sink2.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s - - - -; CHECK-LABEL: define void @foo(i1 %c) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = alloca i32, align 4 -; CHECK-NEXT: %b = alloca i32, align 4 -; CHECK-NEXT: %A = alloca i32, align 4 -; CHECK-NEXT: %B = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) -; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly -; CHECK-EMPTY: -; CHECK-NEXT: outsideonly: -; CHECK-NEXT: store i32 41, i32* %b, align 4 -; CHECK-NEXT: store i32 42, i32* %A, align 4 -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %a, i32* %b, i32* %A) -; CHECK-NEXT: br label %region_start.split -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split -; CHECK-EMPTY: -; CHECK-NEXT: region_start.split: -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: return: -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - -; CHECK-LABEL: define internal void @foo.region_start(i32* %a, i32* %b, i32* %A) { -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: %B = alloca i32, align 4 -; CHECK-NEXT: br label %region_start -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: store i32 45, i32* %A, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: store i32 46, i32* %B, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) -; CHECK-NEXT: br label %region_start.split.exitStub -; CHECK-EMPTY: -; CHECK-NEXT: region_start.split.exitStub: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - - - - - - - -declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) -declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) - -define void @foo(i1 %c) { -entry: - %a = alloca i32, align 4 - %b = alloca i32, align 4 - %A = alloca i32, align 4 - %B = alloca i32, align 4 - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %A) - br i1 %c, label %region_start, label %outsideonly - -outsideonly: - store i32 41, i32* %b - store i32 42, i32* %A - br label %return - -region_start: - store i32 43, i32* %a - store i32 44, i32* %b - store i32 45, i32* %A - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %B) - store i32 46, i32* %B - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %B) - br label %return - -return: - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %A) - ret void -} diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 44f96bdd85a73..8bf931ef4030e 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,8 +84,7 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest (unless " - "using --bb-keep-blocks)." + "the first block in the sequence should dominate the rest.\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -102,8 +101,7 @@ static cl::opt KeepFunctions( static cl::opt KeepBlocks("bb-keep-blocks", - cl::desc("Keep extracted blocks in original function after " - "outlining, likely orphaned."), + cl::desc("Keep extracted blocks in original function after outlining. This permits branches to any selected basic block from outside the selection and overlapping code regions, but only branches to the first in the group will call the extracted function."), cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. From 8e80131b333441f28df5921e4d17af693d3601c6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 18:16:56 -0600 Subject: [PATCH 086/130] cleaning up --- .../llvm/Transforms/Utils/CodeExtractor.h | 54 +++++++++------ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 66 +++++++++---------- llvm/tools/llvm-extract/llvm-extract.cpp | 2 +- 3 files changed, 69 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 9084dd04dab60..57d15ef3fc757 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -96,17 +96,26 @@ class CodeExtractorAnalysisCache { // If true, varargs functions can be extracted. bool AllowVarArgs; +/// If true, copies the code into the extracted function instead of moving it. bool KeepOldBlocks; // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; + +/// Same as ExitBlocks.size(). unsigned NumExitBlocks = std::numeric_limits::max(); + Type *RetTy; - // Mapping from the original exit blocks, to the new blocks inside - // the function. - SmallVector OldTargets; + + + /// Lists of blocks that are branched from the code region to be extracted. ExitBlocks contains each block at most once. OldTargets is used to determine the extracted function's return code (so its order must be deterministic) but may contain the same block multiple times if branched to from multiple blocks. +/// @{ SmallPtrSet ExitBlocks; + SmallVector OldTargets; +/// @} + + // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block @@ -124,6 +133,8 @@ class CodeExtractorAnalysisCache { /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. +/// + /// If KeepOldBlocks is true, the original instances of the extracted region remains in the original function so they can still be branched to from non-extracted blocks. However, only branches to the first block will call the extracted function. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, @@ -156,10 +167,8 @@ class CodeExtractorAnalysisCache { /// newly outlined function. /// \param Outputs [out] - filled with values marked as outputs to the /// newly outlined function. - /// \param KeepOldBlocks If true, the original instances of the extracted - /// region remain; instead of moving them to the new function they are - /// copied. \returns zero when called on a CodeExtractor instance where - /// isEligible returns false. + /// \returns zero when called on a CodeExtractor instance where isEligible + /// returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ValueSet &Inputs, ValueSet &Outputs); @@ -232,44 +241,51 @@ class CodeExtractorAnalysisCache { getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; +/// Updates the list of exit blocks (OldTargets and ExitBlocks) after changes of the control flow or the Blocks list. void recomputeExitBlocks(); void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); void splitReturnBlocks(); - void canonicalizeCFGForExtraction(BasicBlock *&Header, + + + void moveCodeToFunction(Function *newFunction); + + void calculateNewCallTerminatorWeights( + BasicBlock *CodeReplacer, + const DenseMap &ExitWeights, + BranchProbabilityInfo *BPI); + + /// Normalizes the control flow of the extracted regions, such as ensuring that the extracted region does not contain a return instruction. + void normalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); +/// Generates the function declaration for the function containing the extracted code. Function *constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header); + const ValueSet &outputs, const Twine &Name); +/// Generates the extracted function's code. void emitFunction(Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, const ValueSet &SinkingCands, StructType *StructArgTy, - ArrayRef Orlder); + ArrayRef SwichCases); +/// Generates a Basic Block that calls the extracted function. CallInst *emitReplacerCall(Function *oldFunction, BasicBlock *header, BasicBlock *ReplIP, Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BlockFrequency EntryFreq, StructType *StructArgTy, - ArrayRef Orlder, + ArrayRef SwichCases, const SetVector &LifetimesStart, std::vector &Reloads); +/// Connects the basic block containing the call to the extracted function into the original function's control flow. void insertReplacerCall( Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, const ValueSet &outputs, ArrayRef Reloads, const DenseMap &ExitWeights); - - void moveCodeToFunction(Function *newFunction); - - void calculateNewCallTerminatorWeights( - BasicBlock *CodeReplacer, - const DenseMap &ExitWeights, - BranchProbabilityInfo *BPI); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0fa6377ebf00c..f727ea51a9939 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -231,9 +231,9 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, continue; } - if (!KeepOldBlocks) { // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. + // the subgraph which is being extracted. KeepOldBlocks relaxes this requirement. + if (!KeepOldBlocks) { for (auto *PBB : predecessors(BB)) if (!Result.count(PBB)) { LLVM_DEBUG(dbgs() @@ -823,20 +823,20 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header) { + const ValueSet &outputs,const Twine &Name) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - Function *oldFunction = header->getParent(); - Module *M = oldFunction->getParent(); + Function *oldFunction = Blocks.front() ->getParent(); + LLVMContext &Context = oldFunction ->getContext(); + Module *M = Blocks.front() ->getModule(); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; + case 1: RetTy = Type::getVoidTy(Context); break; + case 2: RetTy = Type::getInt1Ty(Context); break; + default: RetTy = Type::getInt16Ty(Context); break; } std::vector paramTy; @@ -873,14 +873,11 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, FunctionType::get(RetTy, paramTy, AllowVarArgs && oldFunction->isVarArg()); - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; + // Create the new function Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); + Name, M); // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) newFunction->setDoesNotThrow(); @@ -999,15 +996,15 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, newFunction->addFnAttr(Attr); } - // Set swifterror parameter attributes. - if (!AggregateArgs) - for (auto P : enumerate(inputs)) { + // Set parameter attributes. + if (!AggregateArgs) { + // Set swifterror parameter attributes. + for (auto P : enumerate(inputs)) if (P.value()->isSwiftError()) newFunction->addParamAttr(P.index(), Attribute::SwiftError); - } + // Set names for input and output arguments. - if (!AggregateArgs) { Function::arg_iterator AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) AI->setName(inputs[i]->getName()); @@ -1018,6 +1015,7 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, return newFunction; } + static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Blocks, Instruction *BranchI) { @@ -1037,7 +1035,7 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, const ValueSet &SinkingCands, StructType *StructArgTy, - ArrayRef Orlder) { + ArrayRef SwichCases) { Function *oldFunction = header->getParent(); LLVMContext &Context = oldFunction->getContext(); @@ -1175,9 +1173,7 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, } std::map ExitBlockMap; - // for (auto OldTarget : OldTargets) { - // for (auto OldTarget : Orlder) { - for (auto &&P : enumerate(Orlder)) { + for (auto &&P : enumerate(SwichCases)) { auto OldTarget = P.value(); auto SuccNum = P.index(); @@ -1401,7 +1397,7 @@ CallInst *CodeExtractor::emitReplacerCall( , BasicBlock *ReplIP, Function *newFunction, const ValueSet &inputs, const ValueSet &outputs, BlockFrequency EntryFreq, StructType *StructArgTy, - ArrayRef Orlder, const SetVector &LifetimesStart, + ArrayRef SwichCases, const SetVector &LifetimesStart, std::vector &Reloads) { LLVMContext &Context = oldFunction->getContext(); Module *M = oldFunction->getParent(); @@ -1513,7 +1509,7 @@ CallInst *CodeExtractor::emitReplacerCall( SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); - for (auto &&P : enumerate(Orlder)) { + for (auto &&P : enumerate(SwichCases)) { auto OldTarget = P.value(); auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; @@ -1871,7 +1867,7 @@ void CodeExtractor::recomputeExitBlocks() { NumExitBlocks = ExitBlocks.size(); } -void CodeExtractor::canonicalizeCFGForExtraction(BasicBlock *&Header, +void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs) { // If we have any return instructions in the region, split those blocks so // that the return is not in the region. @@ -1924,11 +1920,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // block in the region. BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - Module *M = oldFunction->getParent(); - canonicalizeCFGForExtraction(header, KeepOldBlocks); + + normalizeCFGForExtraction(header, KeepOldBlocks); if (!KeepOldBlocks) { // Transforms/HotColdSplit/stale-assume-in-original-func.ll @@ -2005,7 +2001,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, } SmallDenseMap ExitBlockSwitchIdx; - SmallVector Orlder; + SmallVector SwichCases; for (BasicBlock *OldTarget : OldTargets) { if (Blocks.count(OldTarget)) continue; @@ -2013,11 +2009,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, auto Added = ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); if (Added.second) - Orlder.push_back(OldTarget); + SwichCases.push_back(OldTarget); } // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunctionDeclaration(inputs, outputs, header); + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + Function *newFunction = constructFunctionDeclaration(inputs, outputs, oldFunction->getName() + "." + SuffixToUse ); @@ -2030,14 +2030,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, ////////////////////////////////////////////////////// emitFunction(newFunction, inputs, outputs, header, SinkingCands, StructArgTy, - Orlder); + SwichCases); //// Codegen newFunction call replacement ///////////////////////////////////////////////// std::vector Reloads; CallInst *call = emitReplacerCall(oldFunction, header, ReplIP, newFunction, inputs, outputs, EntryFreq, StructArgTy, - Orlder, LifetimesStart, Reloads); + SwichCases, LifetimesStart, Reloads); BasicBlock *codeReplacer = call->getParent(); //// Connect call replacement to CFG diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 8bf931ef4030e..46542a983127c 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,7 +84,7 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest.\n" + "the first block in the sequence should dominate the rest (Unlsess using --bb-keep-blocks).\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " From f723df590ad4703fd569d4b49f3fa60cdad82a19 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 18:56:31 -0600 Subject: [PATCH 087/130] fixing PHIs --- .../llvm/Transforms/Utils/CodeExtractor.h | 22 ++-- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 110 +++++++++--------- 2 files changed, 67 insertions(+), 65 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 57d15ef3fc757..184caa66f2f23 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -263,21 +263,21 @@ class CodeExtractorAnalysisCache { /// Generates the function declaration for the function containing the extracted code. Function *constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, const Twine &Name); + const ValueSet &outputs, BlockFrequency EntryFreq, const Twine &Name); -/// Generates the extracted function's code. - void emitFunction(Function *newFunction, const ValueSet &inputs, - const ValueSet &outputs, BasicBlock *header, - const ValueSet &SinkingCands, StructType *StructArgTy, - ArrayRef SwichCases); +/// Generates the code for the extracted function. That is: a prolog, the moved or copied code from the original function, and epilogs for each exit. + void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, + Function *newFunction, StructType *StructArgTy, + ArrayRef SwitchCases, BasicBlock *header, + const ValueSet &SinkingCands); /// Generates a Basic Block that calls the extracted function. - CallInst *emitReplacerCall(Function *oldFunction, BasicBlock *header, - BasicBlock *ReplIP, Function *newFunction, - const ValueSet &inputs, const ValueSet &outputs, + CallInst *emitReplacerCall( const ValueSet &inputs, const ValueSet &outputs, + Function *newFunction, StructType *StructArgTy, + ArrayRef SwitchCases, + Function *oldFunction , + BasicBlock *ReplIP, BlockFrequency EntryFreq, - StructType *StructArgTy, - ArrayRef SwichCases, const SetVector &LifetimesStart, std::vector &Reloads); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f727ea51a9939..c40730c5a8f56 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -823,7 +823,7 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs,const Twine &Name) { + const ValueSet &outputs, BlockFrequency EntryFreq,const Twine &Name) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); @@ -1012,6 +1012,14 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, AI->setName(outputs[i]->getName()+".out"); } + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + } + return newFunction; } @@ -1031,52 +1039,54 @@ static void applyFirstDebugLoc(Function *oldFunction, } } -void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, - const ValueSet &outputs, BasicBlock *header, - const ValueSet &SinkingCands, - StructType *StructArgTy, - ArrayRef SwichCases) { +void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, + Function *newFunction, StructType *StructArgTy, + ArrayRef SwitchCases, + BasicBlock *header, + const ValueSet &SinkingCands) { Function *oldFunction = header->getParent(); LLVMContext &Context = oldFunction->getContext(); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = - BasicBlock::Create(header->getContext(), "newFuncRoot", newFunction); + BasicBlock::Create(Context, "newFuncRoot", newFunction); + // The map of values from the original function to the corresponding values in the extracted function; only used with KeepOldBlocks. ValueToValueMapTy VMap; + // Additional instructions not in a extracted block whose operands need to be remapped. SmallVector AdditionalRemap; - auto MoveOrCopyInst = [this](Instruction *I, BasicBlock *IB, - BasicBlock::iterator IP) -> Instruction * { + + + // Copy or move (depending on KeepOldBlocks) an instruction to the new function. + auto MoveOrCopyInst = [this,newFuncRoot](Instruction *I) -> Instruction * { + BasicBlock::iterator IP = newFuncRoot->getFirstInsertionPt(); if (KeepOldBlocks) { - auto AI = I->clone(); - AI->setName(I->getName()); - IB->getInstList().insert(IP, AI); - return AI; + Instruction* CloneI = I->clone(); + CloneI->setName(I->getName()); + newFuncRoot->getInstList().insert(IP, CloneI); + return CloneI; } - I->moveBefore(*IB, IP); + I->moveBefore(*newFuncRoot, IP); return I; }; // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. - - for (auto *II : SinkingCands) { + for (Value *II : SinkingCands) { if (!isa(II)) { - auto New = MoveOrCopyInst(cast(II), newFuncRoot, - newFuncRoot->getFirstInsertionPt()); + Instruction* New = MoveOrCopyInst(cast(II)); if (KeepOldBlocks) { AdditionalRemap.push_back(New); VMap[II] = New; } } } - for (auto *II : SinkingCands) { + for (Value *II : SinkingCands) { if (auto *AI = dyn_cast(II)) { - AI = cast( - MoveOrCopyInst(AI, newFuncRoot, newFuncRoot->getFirstInsertionPt())); + AI = cast( MoveOrCopyInst(AI)); if (KeepOldBlocks) { AdditionalRemap.push_back(AI); VMap[II] = AI; @@ -1094,8 +1104,8 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, Value *RewriteVal; if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); Instruction *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); @@ -1107,14 +1117,14 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, NewValues.push_back(RewriteVal); } - for (auto &&P : enumerate(inputs)) { - VMap[P.value()] = NewValues[P.index()]; - } - //// Copy/Move code - /////////////////////////////////////////////////////////////////////////////// + if (KeepOldBlocks) { + for (auto &&P : enumerate(inputs)) + VMap[P.value()] = NewValues[P.index()]; + + // Clone the blocks and instructions code region. for (BasicBlock *Block : Blocks) { BasicBlock *CBB = CloneBasicBlock( Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, @@ -1135,14 +1145,14 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); } - // Note return instructions for the caller. - // if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) - // Returns.push_back(RI); + // Non-header block may have branches from outside the region. These continue to branch to the original blocks, hence remove their PHI entries. + if (Block != header) for (auto &&P : CBB->phis()) { - auto NumIncoming = P.getNumIncomingValues(); + unsigned NumIncoming = P.getNumIncomingValues(); for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - if (Blocks.count(P.getIncomingBlock(Idx))) + BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); + if (Blocks.count(IncomingBlock) ) continue; P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); } @@ -1158,7 +1168,6 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, } else { moveCodeToFunction(newFunction); - if (!KeepOldBlocks) { for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value *RewriteVal = NewValues[i]; @@ -1169,11 +1178,10 @@ void CodeExtractor::emitFunction(Function *newFunction, const ValueSet &inputs, if (Blocks.count(inst->getParent())) inst->replaceUsesOfWith(inputs[i], RewriteVal); } - } } std::map ExitBlockMap; - for (auto &&P : enumerate(SwichCases)) { + for (auto &&P : enumerate(SwitchCases)) { auto OldTarget = P.value(); auto SuccNum = P.index(); @@ -1392,12 +1400,10 @@ static void insertLifetimeMarkersSurroundingCall( } CallInst *CodeExtractor::emitReplacerCall( - Function *oldFunction, - BasicBlock *header // NewHeader - , - BasicBlock *ReplIP, Function *newFunction, const ValueSet &inputs, - const ValueSet &outputs, BlockFrequency EntryFreq, StructType *StructArgTy, - ArrayRef SwichCases, const SetVector &LifetimesStart, + const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + StructType *StructArgTy, ArrayRef SwtichCases , Function *oldFunction , BasicBlock *ReplIP, + BlockFrequency EntryFreq, + const SetVector &LifetimesStart, std::vector &Reloads) { LLVMContext &Context = oldFunction->getContext(); Module *M = oldFunction->getParent(); @@ -1405,15 +1411,11 @@ CallInst *CodeExtractor::emitReplacerCall( // This takes place of the original loop BasicBlock *codeReplacer = - BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, ReplIP); + BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); BasicBlock *AllocaBlock = &oldFunction->front(); // Update the entry count of the function. if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } @@ -1509,7 +1511,7 @@ CallInst *CodeExtractor::emitReplacerCall( SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); - for (auto &&P : enumerate(SwichCases)) { + for (auto &&P : enumerate(SwtichCases)) { auto OldTarget = P.value(); auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; @@ -2017,7 +2019,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Suffix.empty() ? (header->getName().empty() ? "extracted" : header->getName().str()) : Suffix; - Function *newFunction = constructFunctionDeclaration(inputs, outputs, oldFunction->getName() + "." + SuffixToUse ); + Function *newFunction = constructFunctionDeclaration(inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse ); @@ -2029,15 +2031,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - emitFunction(newFunction, inputs, outputs, header, SinkingCands, StructArgTy, - SwichCases); + emitFunctionBody(inputs, outputs, newFunction, StructArgTy, SwichCases, header, + SinkingCands); //// Codegen newFunction call replacement ///////////////////////////////////////////////// std::vector Reloads; - CallInst *call = emitReplacerCall(oldFunction, header, ReplIP, newFunction, - inputs, outputs, EntryFreq, StructArgTy, - SwichCases, LifetimesStart, Reloads); + CallInst *call = emitReplacerCall( inputs, outputs, + newFunction, StructArgTy, SwichCases, oldFunction, ReplIP, + EntryFreq, LifetimesStart, Reloads); BasicBlock *codeReplacer = call->getParent(); //// Connect call replacement to CFG From 6b59af7f446ef9e499da3147c212f510e4e364d2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 20:15:34 -0600 Subject: [PATCH 088/130] cleaning up --- .../llvm/Transforms/Utils/CodeExtractor.h | 1 + llvm/lib/Transforms/Utils/CodeExtractor.cpp | 120 ++++++++++-------- 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 184caa66f2f23..965830dec13b5 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -252,6 +252,7 @@ class CodeExtractorAnalysisCache { void moveCodeToFunction(Function *newFunction); + void calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, const DenseMap &ExitWeights, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c40730c5a8f56..74f18f088719d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1121,49 +1121,45 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out if (KeepOldBlocks) { - for (auto &&P : enumerate(inputs)) - VMap[P.value()] = NewValues[P.index()]; - - // Clone the blocks and instructions code region. - for (BasicBlock *Block : Blocks) { - BasicBlock *CBB = CloneBasicBlock( - Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, - /* DIFinder */ nullptr, - [](const Instruction *I) -> bool { return !isa(I); }); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + // Copy blocks and instrutions to newFunction. + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock( + Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, + /* DIFinder */ nullptr, + [](const Instruction *I) -> bool { return !isa(I); }); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + + // Non-header block may have branches from outside the region. These continue to branch to the original blocks, hence remove their PHI entries. + if (Block != header) + for (auto &&P : CBB->phis()) { + unsigned NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); + if (Blocks.count(IncomingBlock) ) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); + } + } } - // Non-header block may have branches from outside the region. These continue to branch to the original blocks, hence remove their PHI entries. - if (Block != header) - for (auto &&P : CBB->phis()) { - unsigned NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); - if (Blocks.count(IncomingBlock) ) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); - } - } - } +for (auto P : enumerate(inputs)) +VMap[P.value()] = NewValues[P.index()]; - for (auto Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } } else { moveCodeToFunction(newFunction); @@ -1180,24 +1176,26 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out } } + // Create stubs for the original exit blocks. std::map ExitBlockMap; - for (auto &&P : enumerate(SwitchCases)) { - auto OldTarget = P.value(); - auto SuccNum = P.index(); + for (auto P : enumerate(SwitchCases)) { + BasicBlock* OldTarget = P.value(); + size_t SuccNum = P.index(); BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - // if (NewTarget) - // continue; + assert(!NewTarget && "Switch cases muast be unique"); + // If we don't already have an exit stub for this non-extracted // destination, create one now! NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); + if (KeepOldBlocks) VMap[OldTarget] = NewTarget; - // auto SuccNum = ExitBlockSwitchIdx[OldTarget]; - auto &Context = Blocks.front()->getContext(); + + Value *brVal = nullptr; assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); switch (NumExitBlocks) { @@ -1215,6 +1213,7 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out ReturnInst::Create(Context, brVal, NewTarget); } + for (BasicBlock *Block : Blocks) { Instruction *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { @@ -1234,7 +1233,15 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out } } + + // Update values references to point to the new function. if (KeepOldBlocks) { + for (BasicBlock* Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; + } + for (Instruction *II : AdditionalRemap) RemapInstruction(II, VMap, RF_NoModuleLevelChanges); @@ -1242,14 +1249,12 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out // references as we go. This uses VMap to do all the hard work. for (BasicBlock *Block : Blocks) { WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) { + if (!NewBlock) continue; - } - BasicBlock &Y = cast(*NewBlock); - // Loop over all instructions, fixing each one as we find it... - for (Instruction &II : Y) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : cast(*NewBlock)) RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); } } else { @@ -1263,11 +1268,11 @@ void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &out } } - auto NewHeader = header; - if (KeepOldBlocks) - NewHeader = cast(VMap.lookup(NewHeader)); - assert(NewHeader); - auto *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + BasicBlock * NewHeader = KeepOldBlocks ? cast(VMap.lookup(NewHeader)) : header; + assert(NewHeader && "Header must have been cloned/moved to newFunction"); + + // Connect newFunction entry block to new header. + BranchInst *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); // Store the arguments right after the definition of output value. @@ -1677,6 +1682,9 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { } } + + + void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, const DenseMap &ExitWeights, From 6aa65a302e717f43c31f8ab596b962b66cbf512c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 20:47:42 -0600 Subject: [PATCH 089/130] cleaning up --- llvm/include/llvm/Transforms/Utils/Cloning.h | 6 +- .../llvm/Transforms/Utils/CodeExtractor.h | 71 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1339 ++++++++--------- llvm/tools/llvm-extract/llvm-extract.cpp | 14 +- 4 files changed, 713 insertions(+), 717 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index e46de638466e1..cff5e6bc8daea 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,8 +114,10 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. -/// -/// If you would like to clone only a subset of instructions in the basic block, you can specify a callback returning true only for those instructions that are to be cloned. +/// +/// If you would like to clone only a subset of instructions in the basic block, +/// you can specify a callback returning true only for those instructions that +/// are to be cloned. BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 965830dec13b5..c92cc70d623b4 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -96,26 +96,29 @@ class CodeExtractorAnalysisCache { // If true, varargs functions can be extracted. bool AllowVarArgs; -/// If true, copies the code into the extracted function instead of moving it. + /// If true, copies the code into the extracted function instead of moving + /// it. bool KeepOldBlocks; // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; -/// Same as ExitBlocks.size(). + /// Same as ExitBlocks.size(). unsigned NumExitBlocks = std::numeric_limits::max(); Type *RetTy; - - - /// Lists of blocks that are branched from the code region to be extracted. ExitBlocks contains each block at most once. OldTargets is used to determine the extracted function's return code (so its order must be deterministic) but may contain the same block multiple times if branched to from multiple blocks. -/// @{ - SmallPtrSet ExitBlocks; + /// Lists of blocks that are branched from the code region to be extracted. + /// + /// ExitBlocks contains each block at most once. OldTargets is used to + /// determine the extracted function's return code (so its order must be + /// deterministic) but may contain the same block multiple times if branched + /// to from multiple blocks. + /// @{ SmallVector OldTargets; -/// @} - - + SmallPtrSet ExitBlocks; + SmallVector SwitchCases; + /// @} // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block @@ -133,8 +136,11 @@ class CodeExtractorAnalysisCache { /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. -/// - /// If KeepOldBlocks is true, the original instances of the extracted region remains in the original function so they can still be branched to from non-extracted blocks. However, only branches to the first block will call the extracted function. + /// + /// If KeepOldBlocks is true, the original instances of the extracted region + /// remains in the original function so they can still be branched to from + /// non-extracted blocks. However, only branches to the first block will + /// call the extracted function. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, @@ -241,48 +247,51 @@ class CodeExtractorAnalysisCache { getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; -/// Updates the list of exit blocks (OldTargets and ExitBlocks) after changes of the control flow or the Blocks list. + /// Updates the list of exit blocks (OldTargets and ExitBlocks) after + /// changes of the control flow or the Blocks list. void recomputeExitBlocks(); void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); void splitReturnBlocks(); - - void moveCodeToFunction(Function *newFunction); - void calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, const DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - /// Normalizes the control flow of the extracted regions, such as ensuring that the extracted region does not contain a return instruction. - void normalizeCFGForExtraction(BasicBlock *&Header, - bool NoExitBlockPHIs); + /// Normalizes the control flow of the extracted regions, such as ensuring + /// that the extracted region does not contain a return instruction. + void normalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); -/// Generates the function declaration for the function containing the extracted code. + /// Generates the function declaration for the function containing the + /// extracted code. Function *constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, BlockFrequency EntryFreq, const Twine &Name); + const ValueSet &outputs, + BlockFrequency EntryFreq, + const Twine &Name); -/// Generates the code for the extracted function. That is: a prolog, the moved or copied code from the original function, and epilogs for each exit. + /// Generates the code for the extracted function. That is: a prolog, the + /// moved or copied code from the original function, and epilogs for each + /// exit. void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, - Function *newFunction, StructType *StructArgTy, - ArrayRef SwitchCases, BasicBlock *header, - const ValueSet &SinkingCands); + Function *newFunction, StructType *StructArgTy, + ArrayRef SwitchCases, + BasicBlock *header, const ValueSet &SinkingCands); -/// Generates a Basic Block that calls the extracted function. - CallInst *emitReplacerCall( const ValueSet &inputs, const ValueSet &outputs, + /// Generates a Basic Block that calls the extracted function. + CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, StructType *StructArgTy, ArrayRef SwitchCases, - Function *oldFunction , - BasicBlock *ReplIP, + Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, - const SetVector &LifetimesStart, + ArrayRef LifetimesStart, std::vector &Reloads); -/// Connects the basic block containing the call to the extracted function into the original function's control flow. + /// Connects the basic block containing the call to the extracted function + /// into the original function's control flow. void insertReplacerCall( Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, const ValueSet &outputs, ArrayRef Reloads, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 74f18f088719d..bf9f1f4c6350f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -231,8 +231,9 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, continue; } - // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. KeepOldBlocks relaxes this requirement. + // All blocks other than the first must not have predecessors outside of + // the subgraph which is being extracted. KeepOldBlocks relaxes this + // requirement. if (!KeepOldBlocks) { for (auto *PBB : predecessors(BB)) if (!Result.count(PBB)) { @@ -823,20 +824,28 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, BlockFrequency EntryFreq,const Twine &Name) { + const ValueSet &outputs, + BlockFrequency EntryFreq, + const Twine &Name) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - Function *oldFunction = Blocks.front() ->getParent(); - LLVMContext &Context = oldFunction ->getContext(); - Module *M = Blocks.front() ->getModule(); + Function *oldFunction = Blocks.front()->getParent(); + LLVMContext &Context = oldFunction->getContext(); + Module *M = Blocks.front()->getModule(); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: RetTy = Type::getVoidTy(Context); break; - case 2: RetTy = Type::getInt1Ty(Context); break; - default: RetTy = Type::getInt16Ty(Context); break; + case 1: + RetTy = Type::getVoidTy(Context); + break; + case 2: + RetTy = Type::getInt1Ty(Context); + break; + default: + RetTy = Type::getInt16Ty(Context); + break; } std::vector paramTy; @@ -873,11 +882,10 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, FunctionType::get(RetTy, paramTy, AllowVarArgs && oldFunction->isVarArg()); - // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - Name, M); + Function *newFunction = + Function::Create(funcType, GlobalValue::InternalLinkage, + oldFunction->getAddressSpace(), Name, M); // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) newFunction->setDoesNotThrow(); @@ -996,15 +1004,14 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, newFunction->addFnAttr(Attr); } - // Set parameter attributes. + // Set parameter attributes. if (!AggregateArgs) { - // Set swifterror parameter attributes. - for (auto P : enumerate(inputs)) + // Set swifterror parameter attributes. + for (auto P : enumerate(inputs)) if (P.value()->isSwiftError()) newFunction->addParamAttr(P.index(), Attribute::SwiftError); - - // Set names for input and output arguments. + // Set names for input and output arguments. Function::arg_iterator AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) AI->setName(inputs[i]->getName()); @@ -1014,16 +1021,15 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, // Update the entry count of the function. if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME } return newFunction; } - static void applyFirstDebugLoc(Function *oldFunction, ArrayRef Blocks, Instruction *BranchI) { @@ -1039,290 +1045,6 @@ static void applyFirstDebugLoc(Function *oldFunction, } } -void CodeExtractor::emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, - Function *newFunction, StructType *StructArgTy, - ArrayRef SwitchCases, - BasicBlock *header, - const ValueSet &SinkingCands) { - Function *oldFunction = header->getParent(); - LLVMContext &Context = oldFunction->getContext(); - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = - BasicBlock::Create(Context, "newFuncRoot", newFunction); - - // The map of values from the original function to the corresponding values in the extracted function; only used with KeepOldBlocks. - ValueToValueMapTy VMap; - - // Additional instructions not in a extracted block whose operands need to be remapped. - SmallVector AdditionalRemap; - - - // Copy or move (depending on KeepOldBlocks) an instruction to the new function. - auto MoveOrCopyInst = [this,newFuncRoot](Instruction *I) -> Instruction * { - BasicBlock::iterator IP = newFuncRoot->getFirstInsertionPt(); - if (KeepOldBlocks) { - Instruction* CloneI = I->clone(); - CloneI->setName(I->getName()); - newFuncRoot->getInstList().insert(IP, CloneI); - return CloneI; - } - I->moveBefore(*newFuncRoot, IP); - return I; - }; - - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - for (Value *II : SinkingCands) { - if (!isa(II)) { - Instruction* New = MoveOrCopyInst(cast(II)); - if (KeepOldBlocks) { - AdditionalRemap.push_back(New); - VMap[II] = New; - } - } - } - for (Value *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI = cast( MoveOrCopyInst(AI)); - if (KeepOldBlocks) { - AdditionalRemap.push_back(AI); - VMap[II] = AI; - } - } - } - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - NewValues.push_back(RewriteVal); - } - - - - - if (KeepOldBlocks) { - // Copy blocks and instrutions to newFunction. - for (BasicBlock *Block : Blocks) { - BasicBlock *CBB = CloneBasicBlock( - Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, - /* DIFinder */ nullptr, - [](const Instruction *I) -> bool { return !isa(I); }); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - - // Non-header block may have branches from outside the region. These continue to branch to the original blocks, hence remove their PHI entries. - if (Block != header) - for (auto &&P : CBB->phis()) { - unsigned NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); - if (Blocks.count(IncomingBlock) ) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); - } - } - } - - -for (auto P : enumerate(inputs)) -VMap[P.value()] = NewValues[P.index()]; - - - } else { - moveCodeToFunction(newFunction); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; - - std::vector Users(inputs[i]->user_begin(), - inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - } - - // Create stubs for the original exit blocks. - std::map ExitBlockMap; - for (auto P : enumerate(SwitchCases)) { - BasicBlock* OldTarget = P.value(); - size_t SuccNum = P.index(); - - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - assert(!NewTarget && "Switch cases muast be unique"); - - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", - newFunction); - if (KeepOldBlocks) - VMap[OldTarget] = NewTarget; - - - - - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: - break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - } - - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } - } - } - - - // Update values references to point to the new function. - if (KeepOldBlocks) { - for (BasicBlock* Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } - - for (Instruction *II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock *Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) - continue; - - - // Loop over all instructions, fixing each one as we find it... - for (Instruction &II : cast(*NewBlock)) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - } else { - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - } - - BasicBlock * NewHeader = KeepOldBlocks ? cast(VMap.lookup(NewHeader)) : header; - assert(NewHeader && "Header must have been cloned/moved to newFunction"); - - // Connect newFunction entry block to new header. - BranchInst *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; - } - } -} - /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1404,286 +1126,24 @@ static void insertLifetimeMarkersSurroundingCall( } } -CallInst *CodeExtractor::emitReplacerCall( - const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, - StructType *StructArgTy, ArrayRef SwtichCases , Function *oldFunction , BasicBlock *ReplIP, - BlockFrequency EntryFreq, - const SetVector &LifetimesStart, - std::vector &Reloads) { - LLVMContext &Context = oldFunction->getContext(); - Module *M = oldFunction->getParent(); - const DataLayout &DL = M->getDataLayout(); +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*Blocks.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - // This takes place of the original loop - BasicBlock *codeReplacer = - BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); - BasicBlock *AllocaBlock = &oldFunction->front(); + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock *Block : Blocks) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(Block); - // Update the entry count of the function. - if (BFI) { - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + // Insert this basic block into the new function + // Insert the original blocks after the entry block created + // for the new function. The entry block may be followed + // by a set of exit blocks at this point, but these exit + // blocks better be placed at the end of the new function. + newFuncIt = newBlocks.insertAfter(newFuncIt, Block); } - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - std::vector params; - - AllocaInst *Struct = nullptr; - if (AggregateArgs && StructArgTy) { - std::vector StructValues; - for (Value *input : inputs) { - StructValues.push_back(input); - ++ArgNo; - } - - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", &AllocaBlock->front()); - - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } - - std::vector ReloadOutputs; - - if (!AggregateArgs) { - for (Value *input : inputs) { - params.push_back(input); - } - - // Create allocas for the outputs - for (Value *output : outputs) { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, - output->getName() + ".loc", &AllocaBlock->front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } - - // Emit the call to the function - CallInst *call = - CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); - - // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto &&P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - call->addParamAttr(P.index(), Attribute::SwiftError); - } - } - - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = - new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", codeReplacer); - Reloads.push_back(load); - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - for (auto &&P : enumerate(SwtichCases)) { - auto OldTarget = P.value(); - auto SuccNum = P.index(); // ExitBlockSwitchIdx[OldTarget]; - - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), - OldTarget); - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), - TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(M, LifetimesStart.getArrayRef(), {}, - call); - - return call; -} - -void CodeExtractor::insertReplacerCall( - Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, - const ValueSet &outputs, ArrayRef Reloads, - const DenseMap &ExitWeights) { - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) // FIXME: KeepOldBlocks? - // The BasicBlock which contains the branch is not in - // the region modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) - I->replaceUsesOfWith(header, codeReplacer); - - if (KeepOldBlocks) { - // Must be done after remap - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - auto OutIdx = P.index(); - auto OldVal = cast(P.value()); - auto NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), - (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are - // available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (auto &&U : make_early_inc_range(OldVal->uses())) { - auto User = dyn_cast(U.getUser()); - if (!User) - continue; - auto EffectiveUser = User->getParent(); - if (auto &&P = dyn_cast(User)) { - EffectiveUser = P->getIncomingBlock(U); - } - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) - continue; - - SSA.RewriteUseAfterInsertions(U); - } - } - } else { - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto load = Reloads[i]; - - std::vector Users(outputs[i]->user_begin(), - outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } - } - } - - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); -} - -void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = (*Blocks.begin())->getParent(); - Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); - Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - - auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock *Block : Blocks) { - // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(Block); - - // Insert this basic block into the new function - // Insert the original blocks after the entry block created - // for the new function. The entry block may be followed - // by a set of exit blocks at this point, but these exit - // blocks better be placed at the end of the new function. - newFuncIt = newBlocks.insertAfter(newFuncIt, Block); - } -} - - - +} void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, @@ -1861,78 +1321,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { return extractCodeRegion(CEAC, Inputs, Outputs); } -void CodeExtractor::recomputeExitBlocks() { - OldTargets.clear(); - ExitBlocks.clear(); - - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : successors(Block)) { - if (Blocks.count(Succ)) - continue; - - ExitBlocks.insert(Succ); - OldTargets.push_back(Succ); - } - } - NumExitBlocks = ExitBlocks.size(); -} - -void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, - bool NoExitBlockPHIs) { - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(Header); - - // If a PHI in an exit block has multiple invoming values from the outlined - // region, create a new PHI for those values within the region such that only - // PHI itself becomes an output value, not each of its incoming values - // individually. - recomputeExitBlocks(); - severSplitPHINodesOfExits(); - - // If the option was given, ensure there are no PHI nodes at all in the exit - // nodes themselves. - if (NoExitBlockPHIs) { - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : make_early_inc_range(successors(Block))) { - if (Blocks.count(Succ)) - continue; - - if (!Succ->getSinglePredecessor()) - Succ = SplitEdge(Block, Succ, DT); - - // Ensure no PHI node in exit block (still possible with single - // predecessor, e.g. LCSSA) - while (auto *P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues() == 1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } - } - - // Exit nodes may have changed by SplitEdge. - // TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) - recomputeExitBlocks(); - } -} - -Function * -CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, - ValueSet &inputs, ValueSet &outputs) { - if (!isEligible()) - return nullptr; - - // Assumption: this is a single-entry code region, and the header is the first - // block in the region. - BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); - - +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + ValueSet &inputs, ValueSet &outputs) { + if (!isEligible()) + return nullptr; + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); normalizeCFGForExtraction(header, KeepOldBlocks); @@ -1968,7 +1366,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + BasicBlock *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Instruction *TI = HoistToBlock->getTerminator(); for (auto *II : HoistingCands) cast(II)->moveBefore(TI); @@ -2005,32 +1403,21 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // to the new function. BasicBlock *ReplIP = header; if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) { + while (ReplIP && Blocks.count(ReplIP)) ReplIP = ReplIP->getNextNode(); - } } - SmallDenseMap ExitBlockSwitchIdx; - SmallVector SwichCases; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - auto Added = - ExitBlockSwitchIdx.insert({OldTarget, ExitBlockSwitchIdx.size()}); - if (Added.second) - SwichCases.push_back(OldTarget); - } + + // Construct new function based on inputs/outputs & add allocas for all defs. std::string SuffixToUse = Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; - Function *newFunction = constructFunctionDeclaration(inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse ); - - - + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + Function *newFunction = constructFunctionDeclaration( + inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse); StructType *StructArgTy = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) @@ -2039,15 +1426,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - emitFunctionBody(inputs, outputs, newFunction, StructArgTy, SwichCases, header, - SinkingCands); + emitFunctionBody(inputs, outputs, newFunction, StructArgTy, SwitchCases, + header, SinkingCands); //// Codegen newFunction call replacement ///////////////////////////////////////////////// std::vector Reloads; - CallInst *call = emitReplacerCall( inputs, outputs, - newFunction, StructArgTy, SwichCases, oldFunction, ReplIP, - EntryFreq, LifetimesStart, Reloads); + CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, + SwitchCases, oldFunction, ReplIP, EntryFreq, + LifetimesStart.getArrayRef(), Reloads); BasicBlock *codeReplacer = call->getParent(); //// Connect call replacement to CFG @@ -2079,6 +1466,600 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, return newFunction; } +void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, + bool NoExitBlockPHIs) { + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + + // If a PHI in an exit block has multiple invoming values from the outlined + // region, create a new PHI for those values within the region such that only + // PHI itself becomes an output value, not each of its incoming values + // individually. + recomputeExitBlocks(); + severSplitPHINodesOfExits(); + + // If the option was given, ensure there are no PHI nodes at all in the exit + // nodes themselves. + if (NoExitBlockPHIs) { + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : make_early_inc_range(successors(Block))) { + if (Blocks.count(Succ)) + continue; + + if (!Succ->getSinglePredecessor()) + Succ = SplitEdge(Block, Succ, DT); + + // Ensure no PHI node in exit block (still possible with single + // predecessor, e.g. LCSSA) + while (auto *P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues() == 1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + + // Exit nodes may have changed by SplitEdge. + // TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) + recomputeExitBlocks(); + } +} + +void CodeExtractor::recomputeExitBlocks() { + OldTargets.clear(); + ExitBlocks.clear(); + SwitchCases.clear(); + + + + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (Blocks.count(Succ)) + continue; + + OldTargets.push_back(Succ); + bool IsNew = ExitBlocks.insert(Succ).second; + if (IsNew) + SwitchCases.push_back(Succ); + } + } + NumExitBlocks = ExitBlocks.size(); +} + + +void CodeExtractor::emitFunctionBody( + const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + StructType *StructArgTy, ArrayRef SwitchCases, + BasicBlock *header, const ValueSet &SinkingCands) { + Function *oldFunction = header->getParent(); + LLVMContext &Context = oldFunction->getContext(); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = + BasicBlock::Create(Context, "newFuncRoot", newFunction); + + // The map of values from the original function to the corresponding values in + // the extracted function; only used with KeepOldBlocks. + ValueToValueMapTy VMap; + + // Additional instructions not in a extracted block whose operands need to be + // remapped. + SmallVector AdditionalRemap; + + // Copy or move (depending on KeepOldBlocks) an instruction to the new + // function. + auto MoveOrCopyInst = [this, newFuncRoot](Instruction *I) -> Instruction * { + BasicBlock::iterator IP = newFuncRoot->getFirstInsertionPt(); + if (!KeepOldBlocks) { + I->moveBefore(*newFuncRoot, IP); + return I; + } + + Instruction *ClonedI = I->clone(); + ClonedI->setName(I->getName()); + newFuncRoot->getInstList().insert(IP, ClonedI); + return ClonedI; + }; + + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + for (Value *II : SinkingCands) { + if (!isa(II)) { + Instruction *New = MoveOrCopyInst(cast(II)); + if (KeepOldBlocks) { + AdditionalRemap.push_back(New); + VMap[II] = New; + } + } + } + for (Value *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI = cast(MoveOrCopyInst(AI)); + if (KeepOldBlocks) { + AdditionalRemap.push_back(AI); + VMap[II] = AI; + } + } + } + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + NewValues.push_back(RewriteVal); + } + + if (KeepOldBlocks) { + // Copy blocks and instrutions to newFunction. + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock( + Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, + /* DIFinder */ nullptr, + [](const Instruction *I) -> bool { return !isa(I); }); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } + + // Non-header block may have branches from outside the region. These + // continue to branch to the original blocks, hence remove their PHI + // entries. + if (Block != header) + for (auto &&P : CBB->phis()) { + unsigned NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); + if (Blocks.count(IncomingBlock)) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); + } + } + } + + for (auto P : enumerate(inputs)) + VMap[P.value()] = NewValues[P.index()]; + + } else { + moveCodeToFunction(newFunction); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + } + + // Create stubs for the original exit blocks. + std::map ExitBlockMap; + for (auto P : enumerate(SwitchCases)) { + BasicBlock *OldTarget = P.value(); + size_t SuccNum = P.index(); + + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + assert(!NewTarget && "Switch cases muast be unique"); + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", + newFunction); + if (KeepOldBlocks) + VMap[OldTarget] = NewTarget; + + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: + break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + } + + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } + } + } + + // Update values references to point to the new function. + if (KeepOldBlocks) { + for (BasicBlock *Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; + } + + for (Instruction *II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) + continue; + + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : cast(*NewBlock)) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + } else { + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + } + + BasicBlock *NewHeader = + KeepOldBlocks ? cast(VMap.lookup(header)) : header; + assert(NewHeader && "Header must have been cloned/moved to newFunction"); + + // Connect newFunction entry block to new header. + BranchInst *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + if (KeepOldBlocks) + OutI = cast(VMap.lookup(OutI)); + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; + } + } +} + +CallInst *CodeExtractor::emitReplacerCall( + const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + StructType *StructArgTy, ArrayRef SwtichCases, + Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, + ArrayRef LifetimesStart, std::vector &Reloads) { + LLVMContext &Context = oldFunction->getContext(); + Module *M = oldFunction->getParent(); + const DataLayout &DL = M->getDataLayout(); + + // This takes place of the original loop + BasicBlock *codeReplacer = + BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); + BasicBlock *AllocaBlock = &oldFunction->front(); + + // Update the entry count of the function. + if (BFI) + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + + // Add inputs as params, or to be filled into the struct + std::vector params; + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructArgTy) { + std::vector StructValues; + for (Value *input : inputs) { + StructValues.push_back(input); + } + + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &AllocaBlock->front()); + + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + + std::vector ReloadOutputs; + if (!AggregateArgs) { + for (Value *input : inputs) + params.push_back(input); + + // Create allocas for the outputs + for (Value *output : outputs) { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &AllocaBlock->front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + // Emit the call to the function + CallInst *call = + CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); + + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); + } + } + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = + new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + for (auto P : enumerate(SwtichCases)) { + BasicBlock *OldTarget = P.value(); + size_t SuccNum = P.index(); + + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), + OldTarget); + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), + TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart, {}, call); + + return call; +} + +void CodeExtractor::insertReplacerCall( + Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, + const ValueSet &outputs, ArrayRef Reloads, + const DenseMap &ExitWeights) { + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + if (KeepOldBlocks) { + // Change references to output values after the call to use either the value written by the extracted function or the original value if we skipped the call. Use SSAUpdater to propagate the new PHI since the CFG has changed. + + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + size_t OutIdx = P.index(); + Instruction* OldVal = cast(P.value()); + Value* NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), + (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are + // available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (Use &U : make_early_inc_range(OldVal->uses())) { + auto* User = dyn_cast(U.getUser()); + if (!User) + continue; + BasicBlock* EffectiveUser = User->getParent(); + if (auto *PHI = dyn_cast(User)) + EffectiveUser = PHI->getIncomingBlock(U); + + + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + continue; + + SSA.RewriteUseAfterInsertions(U); + } + } + } else { + // When moving the code region it is sufficient to replace all uses to the extracted function values. Since the original definition's block dominated its use, it will also be dominated by codeReplacer's switch which joined multiple exit blocks. + + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value* load = Reloads[i]; + std::vector Users(outputs[i]->user_begin(), + outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } + } + + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); +} + bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 46542a983127c..23956db8f9273 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,7 +84,8 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest (Unlsess using --bb-keep-blocks).\n" + "the first block in the sequence should dominate the rest (Unlsess " + "using --bb-keep-blocks).\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -99,10 +100,13 @@ static cl::opt KeepFunctions( "extracted code is replaced by function call to new function"), cl::cat(ExtractCat)); -static cl::opt - KeepBlocks("bb-keep-blocks", - cl::desc("Keep extracted blocks in original function after outlining. This permits branches to any selected basic block from outside the selection and overlapping code regions, but only branches to the first in the group will call the extracted function."), - cl::cat(ExtractCat)); +static cl::opt KeepBlocks( + "bb-keep-blocks", + cl::desc("Keep extracted blocks in original function after outlining. This " + "permits branches to any selected basic block from outside the " + "selection and overlapping code regions, but only branches to the " + "first in the group will call the extracted function."), + cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. static cl::list From bd3a288c363976007f64efcc87d045255187e277 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 20:57:00 -0600 Subject: [PATCH 090/130] cleaning up --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 12 ++---------- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 12 ++++-------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index c92cc70d623b4..212273b659980 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -108,17 +108,9 @@ class CodeExtractorAnalysisCache { Type *RetTy; - /// Lists of blocks that are branched from the code region to be extracted. - /// - /// ExitBlocks contains each block at most once. OldTargets is used to - /// determine the extracted function's return code (so its order must be - /// deterministic) but may contain the same block multiple times if branched - /// to from multiple blocks. - /// @{ - SmallVector OldTargets; - SmallPtrSet ExitBlocks; + /// Lists of blocks that are branched from the code region to be extracted. Each block is contained at most once. Its order defines the return value of the extracted function, when leaving the extracted function via the first block it returns 0. When leaving via the second entry it returns 1, etc. SmallVector SwitchCases; - /// @} + // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bf9f1f4c6350f..60313f2dbb907 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -758,7 +758,7 @@ void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { /// and other with remaining incoming blocks; then first PHIs are placed in /// outlined region. void CodeExtractor::severSplitPHINodesOfExits() { - for (BasicBlock *ExitBB : ExitBlocks) { + for (BasicBlock *ExitBB : SwitchCases) { BasicBlock *NewBB = nullptr; for (PHINode &PN : ExitBB->phis()) { @@ -1388,7 +1388,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); } - for (BasicBlock *Succ : ExitBlocks) { + for (BasicBlock *Succ : SwitchCases) { for (BasicBlock *Block : predecessors(Succ)) { if (!Blocks.count(Block)) continue; @@ -1510,18 +1510,14 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, } void CodeExtractor::recomputeExitBlocks() { - OldTargets.clear(); - ExitBlocks.clear(); SwitchCases.clear(); - - + SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) { for (BasicBlock *Succ : successors(Block)) { if (Blocks.count(Succ)) continue; - OldTargets.push_back(Succ); bool IsNew = ExitBlocks.insert(Succ).second; if (IsNew) SwitchCases.push_back(Succ); @@ -2025,7 +2021,7 @@ void CodeExtractor::insertReplacerCall( } else { // When moving the code region it is sufficient to replace all uses to the extracted function values. Since the original definition's block dominated its use, it will also be dominated by codeReplacer's switch which joined multiple exit blocks. - for (BasicBlock *ExitBB : ExitBlocks) + for (BasicBlock *ExitBB : SwitchCases) for (PHINode &PN : ExitBB->phis()) { Value *IncomingCodeReplacerVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { From 827cf178113f20b9bcea2b849aaef8d05953783a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 20:57:23 -0600 Subject: [PATCH 091/130] clang-format --- .../llvm/Transforms/Utils/CodeExtractor.h | 7 ++-- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 35 +++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 212273b659980..3bfb4f64210dc 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -108,10 +108,13 @@ class CodeExtractorAnalysisCache { Type *RetTy; - /// Lists of blocks that are branched from the code region to be extracted. Each block is contained at most once. Its order defines the return value of the extracted function, when leaving the extracted function via the first block it returns 0. When leaving via the second entry it returns 1, etc. + /// Lists of blocks that are branched from the code region to be extracted. + /// Each block is contained at most once. Its order defines the return value + /// of the extracted function, when leaving the extracted function via the + /// first block it returns 0. When leaving via the second entry it returns + /// 1, etc. SmallVector SwitchCases; - // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block // label, if non-empty, otherwise "extracted". diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 60313f2dbb907..2440e23de66d6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1403,14 +1403,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // to the new function. BasicBlock *ReplIP = header; if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) + while (ReplIP && Blocks.count(ReplIP)) ReplIP = ReplIP->getNextNode(); } - - - - // Construct new function based on inputs/outputs & add allocas for all defs. std::string SuffixToUse = Suffix.empty() @@ -1460,7 +1456,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, report_fatal_error("verification of newFunction failed!"); }); LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); + report_fatal_error("verification of oldFunction failed!")); LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; @@ -1519,14 +1515,13 @@ void CodeExtractor::recomputeExitBlocks() { continue; bool IsNew = ExitBlocks.insert(Succ).second; - if (IsNew) - SwitchCases.push_back(Succ); + if (IsNew) + SwitchCases.push_back(Succ); } } NumExitBlocks = ExitBlocks.size(); } - void CodeExtractor::emitFunctionBody( const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, StructType *StructArgTy, ArrayRef SwitchCases, @@ -1987,13 +1982,15 @@ void CodeExtractor::insertReplacerCall( I->replaceUsesOfWith(header, codeReplacer); if (KeepOldBlocks) { - // Change references to output values after the call to use either the value written by the extracted function or the original value if we skipped the call. Use SSAUpdater to propagate the new PHI since the CFG has changed. + // Change references to output values after the call to use either the value + // written by the extracted function or the original value if we skipped the + // call. Use SSAUpdater to propagate the new PHI since the CFG has changed. SSAUpdater SSA; for (auto P : enumerate(outputs)) { size_t OutIdx = P.index(); - Instruction* OldVal = cast(P.value()); - Value* NewVal = Reloads[OutIdx]; + Instruction *OldVal = cast(P.value()); + Value *NewVal = Reloads[OutIdx]; SSA.Initialize(OldVal->getType(), (OldVal->getName() + ".merge_with_extracted").str()); @@ -2004,13 +2001,12 @@ void CodeExtractor::insertReplacerCall( SSA.AddAvailableValue(OldVal->getParent(), OldVal); for (Use &U : make_early_inc_range(OldVal->uses())) { - auto* User = dyn_cast(U.getUser()); + auto *User = dyn_cast(U.getUser()); if (!User) continue; - BasicBlock* EffectiveUser = User->getParent(); - if (auto *PHI = dyn_cast(User)) + BasicBlock *EffectiveUser = User->getParent(); + if (auto *PHI = dyn_cast(User)) EffectiveUser = PHI->getIncomingBlock(U); - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) continue; @@ -2019,7 +2015,10 @@ void CodeExtractor::insertReplacerCall( } } } else { - // When moving the code region it is sufficient to replace all uses to the extracted function values. Since the original definition's block dominated its use, it will also be dominated by codeReplacer's switch which joined multiple exit blocks. + // When moving the code region it is sufficient to replace all uses to the + // extracted function values. Since the original definition's block + // dominated its use, it will also be dominated by codeReplacer's switch + // which joined multiple exit blocks. for (BasicBlock *ExitBB : SwitchCases) for (PHINode &PN : ExitBB->phis()) { @@ -2040,7 +2039,7 @@ void CodeExtractor::insertReplacerCall( } for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value* load = Reloads[i]; + Value *load = Reloads[i]; std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { From c8d067ffccfd70a7d8b08c901cb2e9586bebf9bc Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 21:04:06 -0600 Subject: [PATCH 092/130] Keep only SwitchCases --- .../llvm/Transforms/Utils/CodeExtractor.h | 2 -- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 20 +++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 3bfb4f64210dc..b9f7861ba5a57 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -273,13 +273,11 @@ class CodeExtractorAnalysisCache { /// exit. void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, StructType *StructArgTy, - ArrayRef SwitchCases, BasicBlock *header, const ValueSet &SinkingCands); /// Generates a Basic Block that calls the extracted function. CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, StructType *StructArgTy, - ArrayRef SwitchCases, Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, ArrayRef LifetimesStart, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 2440e23de66d6..f458e15fece49 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1422,14 +1422,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, //// CodeGen newFunction implementation ////////////////////////////////////////////////////// - emitFunctionBody(inputs, outputs, newFunction, StructArgTy, SwitchCases, - header, SinkingCands); + emitFunctionBody(inputs, outputs, newFunction, StructArgTy, header, + SinkingCands); //// Codegen newFunction call replacement ///////////////////////////////////////////////// std::vector Reloads; CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, - SwitchCases, oldFunction, ReplIP, EntryFreq, + oldFunction, ReplIP, EntryFreq, LifetimesStart.getArrayRef(), Reloads); BasicBlock *codeReplacer = call->getParent(); @@ -1507,8 +1507,8 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, void CodeExtractor::recomputeExitBlocks() { SwitchCases.clear(); - SmallPtrSet ExitBlocks; + SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) { for (BasicBlock *Succ : successors(Block)) { if (Blocks.count(Succ)) @@ -1524,8 +1524,7 @@ void CodeExtractor::recomputeExitBlocks() { void CodeExtractor::emitFunctionBody( const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, - StructType *StructArgTy, ArrayRef SwitchCases, - BasicBlock *header, const ValueSet &SinkingCands) { + StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) { Function *oldFunction = header->getParent(); LLVMContext &Context = oldFunction->getContext(); @@ -1799,9 +1798,9 @@ void CodeExtractor::emitFunctionBody( CallInst *CodeExtractor::emitReplacerCall( const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, - StructType *StructArgTy, ArrayRef SwtichCases, - Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, - ArrayRef LifetimesStart, std::vector &Reloads) { + StructType *StructArgTy, Function *oldFunction, BasicBlock *ReplIP, + BlockFrequency EntryFreq, ArrayRef LifetimesStart, + std::vector &Reloads) { LLVMContext &Context = oldFunction->getContext(); Module *M = oldFunction->getParent(); const DataLayout &DL = M->getDataLayout(); @@ -1901,8 +1900,7 @@ CallInst *CodeExtractor::emitReplacerCall( SwitchInst *TheSwitch = SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); - - for (auto P : enumerate(SwtichCases)) { + for (auto P : enumerate(SwitchCases)) { BasicBlock *OldTarget = P.value(); size_t SuccNum = P.index(); From 35dd8942f6353421d48c9c61ee23e56f55a86a23 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 21:11:20 -0600 Subject: [PATCH 093/130] Undo unnecessary change --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index b9f7861ba5a57..df4d5a6a88763 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -102,10 +102,7 @@ class CodeExtractorAnalysisCache { // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; - - /// Same as ExitBlocks.size(). unsigned NumExitBlocks = std::numeric_limits::max(); - Type *RetTy; /// Lists of blocks that are branched from the code region to be extracted. From d394e0e58d506114a8e8f7e1e874cae26e0babc2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 21:44:11 -0600 Subject: [PATCH 094/130] cundo changes/format --- llvm/include/llvm/Transforms/IPO.h | 4 +- llvm/include/llvm/Transforms/Utils/Cloning.h | 14 +- .../llvm/Transforms/Utils/CodeExtractor.h | 17 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 34 +- llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 351 ++++-------------- .../llvm-extract/extract-block-cleanup.ll | 116 ------ .../extract-block-multiple-exits.ll | 200 ---------- .../tools/llvm-extract/extract-block-sink.ll | 67 ---- llvm/test/tools/llvm-extract/extract-block.ll | 10 +- .../extract-blocks-with-groups.ll | 24 +- llvm/tools/llvm-extract/llvm-extract.cpp | 20 +- 12 files changed, 113 insertions(+), 756 deletions(-) delete mode 100644 llvm/test/tools/llvm-extract/extract-block-cleanup.ll delete mode 100644 llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll delete mode 100644 llvm/test/tools/llvm-extract/extract-block-sink.ll diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index d11c27304815d..67b9a93c47b21 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks = false); + bool EraseFunctions); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks = false); + bool EraseFunctions); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index cff5e6bc8daea..5a1f322b20544 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,16 +114,10 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. -/// -/// If you would like to clone only a subset of instructions in the basic block, -/// you can specify a callback returning true only for those instructions that -/// are to be cloned. -BasicBlock * -CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr, - function_ref InstSelect = {}); +BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix = "", Function *F = nullptr, + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index df4d5a6a88763..780ce9a1ea36c 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -96,10 +96,6 @@ class CodeExtractorAnalysisCache { // If true, varargs functions can be extracted. bool AllowVarArgs; - /// If true, copies the code into the extracted function instead of moving - /// it. - bool KeepOldBlocks; - // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; unsigned NumExitBlocks = std::numeric_limits::max(); @@ -128,17 +124,12 @@ class CodeExtractorAnalysisCache { /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. - /// - /// If KeepOldBlocks is true, the original instances of the extracted region - /// remains in the original function so they can still be branched to from - /// non-extracted blocks. However, only branches to the first block will - /// call the extracted function. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, bool AllowVarArgs = false, - bool AllowAlloca = false, std::string Suffix = "", - bool KeepOldBlocks = false); + AssumptionCache *AC = nullptr, + bool AllowVarArgs = false, bool AllowAlloca = false, + std::string Suffix = ""); /// Create a code extractor for a loop body. /// @@ -256,7 +247,7 @@ class CodeExtractorAnalysisCache { /// Normalizes the control flow of the extracted regions, such as ensuring /// that the extracted region does not contain a return instruction. - void normalizeCFGForExtraction(BasicBlock *&Header, bool NoExitBlockPHIs); + void normalizeCFGForExtraction(BasicBlock *&Header); /// Generates the function declaration for the function containing the /// extracted code. diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 408bb55235d7e..7c178f9a98345 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,8 +43,7 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions, bool KeepOldBlocks = false) - : EraseFunctions(EraseFunctions), KeepOldBlocks(KeepOldBlocks) {} + BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {} bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { @@ -61,7 +60,6 @@ class BlockExtractor { private: SmallVector, 4> GroupsOfBlocks; bool EraseFunctions; - bool KeepOldBlocks; /// Map a function name to groups of blocks. SmallVector>, 4> BlocksByName; @@ -77,8 +75,8 @@ class BlockExtractorLegacyPass : public ModulePass { public: static char ID; BlockExtractorLegacyPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks) - : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { + bool EraseFunctions) + : ModulePass(ID), BE(EraseFunctions) { // We want one group per element of the input list. SmallVector, 4> MassagedGroupsOfBlocks; for (BasicBlock *BB : BlocksToExtract) { @@ -91,14 +89,13 @@ class BlockExtractorLegacyPass : public ModulePass { BlockExtractorLegacyPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks) - : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { + bool EraseFunctions) + : ModulePass(ID), BE(EraseFunctions) { BE.init(GroupsOfBlocksToExtract); } BlockExtractorLegacyPass() - : BlockExtractorLegacyPass(SmallVector(), false, false) { - } + : BlockExtractorLegacyPass(SmallVector(), false) {} }; } // end anonymous namespace @@ -111,17 +108,14 @@ ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorLegacyPass(); } ModulePass *llvm::createBlockExtractorPass( - const SmallVectorImpl &BlocksToExtract, bool EraseFunctions, - bool KeepOldBlocks) { - return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions, - KeepOldBlocks); + const SmallVectorImpl &BlocksToExtract, bool EraseFunctions) { + return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions); } ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions, bool KeepOldBlocks) { - return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions, - KeepOldBlocks); + bool EraseFunctions) { + return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions); } /// Gets all of the blocks specified in the input file. @@ -229,13 +223,7 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = - CodeExtractor(BlocksToExtractVec, /* DT */ nullptr, - /* AggregateArgs*/ false, /* BFI */ nullptr, - /* BPI */ nullptr, /* AC */ nullptr, - /* AllowVarArgs */ false, /* AllowAlloca */ false, - /* Suffix */ "", KeepOldBlocks) - .extractCodeRegion(CEAC); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index c08adec77445f..048e691e33cf1 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -40,11 +40,10 @@ using namespace llvm; #define DEBUG_TYPE "clone-function" /// See comments in Cloning.h. -BasicBlock * -llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo, DebugInfoFinder *DIFinder, - function_ref InstSelect) { +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -54,9 +53,6 @@ llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, // Loop over all instructions, and copy them over. for (const Instruction &I : *BB) { - if (InstSelect && !InstSelect(&I)) - continue; - if (DIFinder && TheModule) DIFinder->processInstruction(*TheModule, I); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f458e15fece49..5c58792e4b390 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -61,9 +61,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include #include #include @@ -199,8 +197,7 @@ static bool isBlockValidForExtraction(const BasicBlock &BB, /// Build a set of blocks to extract if the input blocks are viable. static SetVector buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, - bool AllowVarArgs, bool AllowAlloca, - bool KeepOldBlocks) { + bool AllowVarArgs, bool AllowAlloca) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -232,20 +229,16 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, } // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. KeepOldBlocks relaxes this - // requirement. - if (!KeepOldBlocks) { - for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB)) { - LLVM_DEBUG(dbgs() - << "No blocks in this region may have entries from " - "outside the region except for the first block!\n" - << "Problematic source BB: " << BB->getName() << "\n" - << "Problematic destination BB: " << PBB->getName() - << "\n"); - return {}; - } - } + // the subgraph which is being extracted. + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " + "outside the region except for the first block!\n" + << "Problematic source BB: " << BB->getName() << "\n" + << "Problematic destination BB: " << PBB->getName() + << "\n"); + return {}; + } } return Result; @@ -255,12 +248,10 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix, bool KeepOldBlocks) + std::string Suffix) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), - KeepOldBlocks(KeepOldBlocks), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, - KeepOldBlocks)), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, @@ -268,11 +259,10 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), KeepOldBlocks(false), + BPI(BPI), AC(AC), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, - /* AllowAlloca */ false, - /* KeepOldBlocks */ false)), + /* AllowAlloca */ false)), Suffix(Suffix) {} /// definedInRegion - Return true if the specified value is defined in the @@ -659,10 +649,6 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - // Ignore assumptions if not been removed yet. - if (isa(II)) - continue; - for (auto &OI : II.operands()) { Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) @@ -1332,19 +1318,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - normalizeCFGForExtraction(header, KeepOldBlocks); - - if (!KeepOldBlocks) { - // Transforms/HotColdSplit/stale-assume-in-original-func.ll - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock *Block : Blocks) { - for (Instruction &I : llvm::make_early_inc_range(*Block)) { - if (auto *AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); - } + normalizeCFGForExtraction(header); + + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); } } } @@ -1402,10 +1386,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Determine position for the replacement code. Do so before header is moved // to the new function. BasicBlock *ReplIP = header; - if (!KeepOldBlocks) { - while (ReplIP && Blocks.count(ReplIP)) - ReplIP = ReplIP->getNextNode(); - } + while (ReplIP && Blocks.count(ReplIP)) + ReplIP = ReplIP->getNextNode(); // Construct new function based on inputs/outputs & add allocas for all defs. std::string SuffixToUse = @@ -1419,22 +1401,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (AggregateArgs && (inputs.size() + outputs.size() > 0)) StructArgTy = cast(newFunction->getArg(0)->getType()); - //// CodeGen newFunction implementation - ////////////////////////////////////////////////////// + emitFunctionBody(inputs, outputs, newFunction, StructArgTy, header, SinkingCands); - //// Codegen newFunction call replacement - ///////////////////////////////////////////////// + std::vector Reloads; CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, oldFunction, ReplIP, EntryFreq, LifetimesStart.getArrayRef(), Reloads); BasicBlock *codeReplacer = call->getParent(); - //// Connect call replacement to CFG - /////////////////////////////////////////////////////////////////////////// insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, ExitWeights); @@ -1462,8 +1440,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, return newFunction; } -void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, - bool NoExitBlockPHIs) { +void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header) { // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); @@ -1477,38 +1454,12 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header, // individually. recomputeExitBlocks(); severSplitPHINodesOfExits(); - - // If the option was given, ensure there are no PHI nodes at all in the exit - // nodes themselves. - if (NoExitBlockPHIs) { - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : make_early_inc_range(successors(Block))) { - if (Blocks.count(Succ)) - continue; - - if (!Succ->getSinglePredecessor()) - Succ = SplitEdge(Block, Succ, DT); - - // Ensure no PHI node in exit block (still possible with single - // predecessor, e.g. LCSSA) - while (auto *P = dyn_cast(&Succ->front())) { - assert(P->getNumIncomingValues() == 1); - P->replaceAllUsesWith(P->getIncomingValue(0)); - P->eraseFromParent(); - } - } - } - - // Exit nodes may have changed by SplitEdge. - // TODO: Preserve BPI/BFI for ExitBlocks (so should splitReturnBlocks()) - recomputeExitBlocks(); - } } void CodeExtractor::recomputeExitBlocks() { SwitchCases.clear(); - SmallPtrSet ExitBlocks; + SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) { for (BasicBlock *Succ : successors(Block)) { if (Blocks.count(Succ)) @@ -1533,48 +1484,18 @@ void CodeExtractor::emitFunctionBody( BasicBlock *newFuncRoot = BasicBlock::Create(Context, "newFuncRoot", newFunction); - // The map of values from the original function to the corresponding values in - // the extracted function; only used with KeepOldBlocks. - ValueToValueMapTy VMap; - - // Additional instructions not in a extracted block whose operands need to be - // remapped. - SmallVector AdditionalRemap; - - // Copy or move (depending on KeepOldBlocks) an instruction to the new - // function. - auto MoveOrCopyInst = [this, newFuncRoot](Instruction *I) -> Instruction * { - BasicBlock::iterator IP = newFuncRoot->getFirstInsertionPt(); - if (!KeepOldBlocks) { - I->moveBefore(*newFuncRoot, IP); - return I; - } - - Instruction *ClonedI = I->clone(); - ClonedI->setName(I->getName()); - newFuncRoot->getInstList().insert(IP, ClonedI); - return ClonedI; - }; // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. - for (Value *II : SinkingCands) { + for (auto *II : SinkingCands) { if (!isa(II)) { - Instruction *New = MoveOrCopyInst(cast(II)); - if (KeepOldBlocks) { - AdditionalRemap.push_back(New); - VMap[II] = New; - } + cast(II)->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); } } - for (Value *II : SinkingCands) { + for (auto *II : SinkingCands) { if (auto *AI = dyn_cast(II)) { - AI = cast(MoveOrCopyInst(AI)); - if (KeepOldBlocks) { - AdditionalRemap.push_back(AI); - VMap[II] = AI; - } + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); } } @@ -1601,58 +1522,16 @@ void CodeExtractor::emitFunctionBody( NewValues.push_back(RewriteVal); } - if (KeepOldBlocks) { - // Copy blocks and instrutions to newFunction. - for (BasicBlock *Block : Blocks) { - BasicBlock *CBB = CloneBasicBlock( - Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, - /* DIFinder */ nullptr, - [](const Instruction *I) -> bool { return !isa(I); }); - - // Add basic block mapping. - VMap[Block] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (Block->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); - VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); - } - - // Non-header block may have branches from outside the region. These - // continue to branch to the original blocks, hence remove their PHI - // entries. - if (Block != header) - for (auto &&P : CBB->phis()) { - unsigned NumIncoming = P.getNumIncomingValues(); - for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { - BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); - if (Blocks.count(IncomingBlock)) - continue; - P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); - } - } - } - - for (auto P : enumerate(inputs)) - VMap[P.value()] = NewValues[P.index()]; - - } else { - moveCodeToFunction(newFunction); + moveCodeToFunction(newFunction); - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); } // Create stubs for the original exit blocks. @@ -1668,8 +1547,6 @@ void CodeExtractor::emitFunctionBody( // destination, create one now! NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); - if (KeepOldBlocks) - VMap[OldTarget] = NewTarget; Value *brVal = nullptr; assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); @@ -1698,54 +1575,22 @@ void CodeExtractor::emitFunctionBody( BasicBlock *NewTarget = ExitBlockMap[OldTarget]; assert(NewTarget && "Unknown target block!"); - if (!KeepOldBlocks) { - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; - } + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); } } - // Update values references to point to the new function. - if (KeepOldBlocks) { - for (BasicBlock *Pred : predecessors(header)) { - if (VMap.count(Pred)) - continue; - VMap[Pred] = newFuncRoot; - } - - for (Instruction *II : AdditionalRemap) - RemapInstruction(II, VMap, RF_NoModuleLevelChanges); - - // Loop over all of the instructions in the new function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (BasicBlock *Block : Blocks) { - WeakTrackingVH NewBlock = VMap.lookup(Block); - if (!NewBlock) - continue; - - // Loop over all instructions, fixing each one as we find it... - for (Instruction &II : cast(*NewBlock)) - RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); - } - } else { - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); } - BasicBlock *NewHeader = - KeepOldBlocks ? cast(VMap.lookup(header)) : header; - assert(NewHeader && "Header must have been cloned/moved to newFunction"); - // Connect newFunction entry block to new header. - BranchInst *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); + BranchInst *BranchI2 = BranchInst::Create(header, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); // Store the arguments right after the definition of output value. @@ -1757,9 +1602,6 @@ void CodeExtractor::emitFunctionBody( if (!OutI) continue; - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - // Find proper insertion point. BasicBlock::iterator InsertPt; // In case OutI is an invoke, we insert the store at the beginning in the @@ -1979,72 +1821,35 @@ void CodeExtractor::insertReplacerCall( if (I->isTerminator() && I->getFunction() == oldFunction) I->replaceUsesOfWith(header, codeReplacer); - if (KeepOldBlocks) { - // Change references to output values after the call to use either the value - // written by the extracted function or the original value if we skipped the - // call. Use SSAUpdater to propagate the new PHI since the CFG has changed. - - SSAUpdater SSA; - for (auto P : enumerate(outputs)) { - size_t OutIdx = P.index(); - Instruction *OldVal = cast(P.value()); - Value *NewVal = Reloads[OutIdx]; - - SSA.Initialize(OldVal->getType(), - (OldVal->getName() + ".merge_with_extracted").str()); - SSA.AddAvailableValue(codeReplacer, NewVal); - - // Could help SSAUpdater by determining in advance which output values are - // available in which exit blocks (from DT). - SSA.AddAvailableValue(OldVal->getParent(), OldVal); - - for (Use &U : make_early_inc_range(OldVal->uses())) { - auto *User = dyn_cast(U.getUser()); - if (!User) - continue; - BasicBlock *EffectiveUser = User->getParent(); - if (auto *PHI = dyn_cast(User)) - EffectiveUser = PHI->getIncomingBlock(U); - - if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + // When moving the code region it is sufficient to replace all uses to the + // extracted function values. Since the original definition's block + // dominated its use, it will also be dominated by codeReplacer's switch + // which joined multiple exit blocks. + for (BasicBlock *ExitBB : SwitchCases) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) continue; - SSA.RewriteUseAfterInsertions(U); + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); } } - } else { - // When moving the code region it is sufficient to replace all uses to the - // extracted function values. Since the original definition's block - // dominated its use, it will also be dominated by codeReplacer's switch - // which joined multiple exit blocks. - - for (BasicBlock *ExitBB : SwitchCases) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *load = Reloads[i]; - std::vector Users(outputs[i]->user_begin(), - outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); - } + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *load = Reloads[i]; + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); } } diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll deleted file mode 100644 index bbf656fe696f6..0000000000000 --- a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll +++ /dev/null @@ -1,116 +0,0 @@ -; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s - - -; CHECK-LABEL: define void @foo(i32* %arg, i1 %c) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly -; CHECK-EMPTY: -; CHECK-NEXT: outsideonly: -; CHECK-NEXT: store i32 0, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: %targetBlock = call i1 @foo.region_start(i32* %arg) -; CHECK-NEXT: br i1 %targetBlock, label %cleanup.return_crit_edge, label %region_end.split -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: br label %extractonly -; CHECK-EMPTY: -; CHECK-NEXT: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; CHECK-EMPTY: -; CHECK-NEXT: cleanup: -; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] -; CHECK-NEXT: switch i8 %dest, label %fallback [ -; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge -; CHECK-NEXT: i8 1, label %region_end -; CHECK-NEXT: ] -; CHECK-EMPTY: -; CHECK-NEXT: cleanup.return_crit_edge: -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: fallback: -; CHECK-NEXT: unreachable -; CHECK-EMPTY: -; CHECK-NEXT: region_end: -; CHECK-NEXT: br label %region_end.split -; CHECK-EMPTY: -; CHECK-NEXT: region_end.split: -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: outsidecont: -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: return: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - -; CHECK-LABEL: define internal i1 @foo.region_start(i32* %arg) { -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: br label %region_start -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: br label %extractonly -; CHECK-EMPTY: -; CHECK-NEXT: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 -; CHECK-NEXT: br label %cleanup -; CHECK-EMPTY: -; CHECK-NEXT: cleanup: -; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] -; CHECK-NEXT: switch i8 %dest, label %fallback [ -; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge.exitStub -; CHECK-NEXT: i8 1, label %region_end -; CHECK-NEXT: ] -; CHECK-EMPTY: -; CHECK-NEXT: fallback: -; CHECK-NEXT: unreachable -; CHECK-EMPTY: -; CHECK-NEXT: region_end: -; CHECK-NEXT: br label %region_end.split.exitStub -; CHECK-EMPTY: -; CHECK-NEXT: cleanup.return_crit_edge.exitStub: -; CHECK-NEXT: ret i1 true -; CHECK-EMPTY: -; CHECK-NEXT: region_end.split.exitStub: -; CHECK-NEXT: ret i1 false -; CHECK-NEXT: } - - - -define void @foo(i32* %arg, i1 %c) { -entry: - br i1 %c, label %region_start, label %outsideonly - -outsideonly: - store i32 0, i32* %arg, align 4 - br label %cleanup - -region_start: - br label %extractonly - -extractonly: - store i32 1, i32* %arg, align 4 - br label %cleanup - -cleanup: - %dest = phi i8 [0, %outsideonly], [1, %extractonly] - switch i8 %dest, label %fallback [ - i8 0, label %return - i8 1, label %region_end - ] - -fallback: - unreachable - -region_end: - br label %return - -outsidecont: - br label %return - -return: - ret void -} diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll deleted file mode 100644 index b4c0667b9a58d..0000000000000 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ /dev/null @@ -1,200 +0,0 @@ -; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s - - -; CHECK-LABEL: define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %B.ce.loc = alloca i32, align 4 -; CHECK-NEXT: %c.loc = alloca i32, align 4 -; CHECK-NEXT: %b.loc = alloca i32, align 4 -; CHECK-NEXT: %a.loc = alloca i32, align 4 -; CHECK-NEXT: br i1 %c0, label %codeRepl, label %exit -; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: %lt.cast = bitcast i32* %a.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast) -; CHECK-NEXT: %lt.cast1 = bitcast i32* %b.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1) -; CHECK-NEXT: %lt.cast2 = bitcast i32* %c.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast2) -; CHECK-NEXT: %lt.cast3 = bitcast i32* %B.ce.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast3) -; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.loc, i32* %b.loc, i32* %c.loc, i32* %B.ce.loc) -; CHECK-NEXT: %a.reload = load i32, i32* %a.loc, align 4 -; CHECK-NEXT: %b.reload = load i32, i32* %b.loc, align 4 -; CHECK-NEXT: %c.reload = load i32, i32* %c.loc, align 4 -; CHECK-NEXT: %B.ce.reload = load i32, i32* %B.ce.loc, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast1) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast2) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast3) -; CHECK-NEXT: switch i16 %targetBlock, label %exit0 [ -; CHECK-NEXT: i16 0, label %exiting0.exit_crit_edge -; CHECK-NEXT: i16 1, label %fallback -; CHECK-NEXT: i16 2, label %exit1 -; CHECK-NEXT: i16 3, label %exit2 -; CHECK-NEXT: ] -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: %a = add i32 42, 1 -; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 -; CHECK-EMPTY: -; CHECK-NEXT: exiting0: -; CHECK-NEXT: %b = add i32 42, 2 -; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge, label %exit0.split -; CHECK-EMPTY: -; CHECK-NEXT: exiting0.exit_crit_edge: -; CHECK-NEXT: %b.merge_with_extracted7 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] -; CHECK-NEXT: br label %exit -; CHECK-EMPTY: -; CHECK-NEXT: exiting1: -; CHECK-NEXT: %c = add i32 42, 3 -; CHECK-NEXT: switch i8 %dest, label %fallback [ -; CHECK-NEXT: i8 0, label %exit0.split -; CHECK-NEXT: i8 1, label %exit1 -; CHECK-NEXT: i8 2, label %exit2 -; CHECK-NEXT: i8 3, label %exit0.split -; CHECK-NEXT: ] -; CHECK-EMPTY: -; CHECK-NEXT: fallback: -; CHECK-NEXT: unreachable -; CHECK-EMPTY: -; CHECK-NEXT: exit: -; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted7, %exiting0.exit_crit_edge ] -; CHECK-NEXT: store i32 %A, i32* %arg, align 4 -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: exit0.split: -; CHECK-NEXT: %b.merge_with_extracted6 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] -; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] -; CHECK-NEXT: br label %exit0 -; CHECK-EMPTY: -; CHECK-NEXT: exit0: -; CHECK-NEXT: %B.ce.merge_with_extracted = phi i32 [ %B.ce.reload, %codeRepl ], [ %B.ce, %exit0.split ] -; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted6, %exit0.split ] -; CHECK-NEXT: %a.merge_with_extracted5 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] -; CHECK-NEXT: store i32 %a.merge_with_extracted5, i32* %arg, align 4 -; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: br label %after -; CHECK-EMPTY: -; CHECK-NEXT: exit1: -; CHECK-NEXT: %c.merge_with_extracted8 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] -; CHECK-NEXT: %a.merge_with_extracted4 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] -; CHECK-NEXT: br label %after -; CHECK-EMPTY: -; CHECK-NEXT: exit2: -; CHECK-NEXT: %c.merge_with_extracted = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] -; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: after: -; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted5, %exit0 ], [ %a.merge_with_extracted4, %exit1 ] -; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted8, %exit1 ] -; CHECK-NEXT: store i32 %a.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: store i32 %D, i32* %arg, align 4 -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: return: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - -; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.out, i32* %b.out, i32* %c.out, i32* %B.ce.out) { -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: br label %region_start -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: %a = add i32 42, 1 -; CHECK-NEXT: store i32 %a, i32* %a.out, align 4 -; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 -; CHECK-EMPTY: -; CHECK-NEXT: exiting0: -; CHECK-NEXT: %b = add i32 42, 2 -; CHECK-NEXT: store i32 %b, i32* %b.out, align 4 -; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge.exitStub, label %exit0.split -; CHECK-EMPTY: -; CHECK-NEXT: exiting1: -; CHECK-NEXT: %c = add i32 42, 3 -; CHECK-NEXT: store i32 %c, i32* %c.out, align 4 -; CHECK-NEXT: switch i8 %dest, label %fallback.exitStub [ -; CHECK-NEXT: i8 0, label %exit0.split -; CHECK-NEXT: i8 1, label %exit1.exitStub -; CHECK-NEXT: i8 2, label %exit2.exitStub -; CHECK-NEXT: i8 3, label %exit0.split -; CHECK-NEXT: ] -; CHECK-EMPTY: -; CHECK-NEXT: exit0.split: -; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] -; CHECK-NEXT: store i32 %B.ce, i32* %B.ce.out, align 4 -; CHECK-NEXT: br label %exit0.exitStub -; CHECK-EMPTY: -; CHECK-NEXT: exiting0.exit_crit_edge.exitStub: -; CHECK-NEXT: ret i16 0 -; CHECK-EMPTY: -; CHECK-NEXT: fallback.exitStub: -; CHECK-NEXT: ret i16 1 -; CHECK-EMPTY: -; CHECK-NEXT: exit1.exitStub: -; CHECK-NEXT: ret i16 2 -; CHECK-EMPTY: -; CHECK-NEXT: exit2.exitStub: -; CHECK-NEXT: ret i16 3 -; CHECK-EMPTY: -; CHECK-NEXT: exit0.exitStub: -; CHECK-NEXT: ret i16 4 -; CHECK-NEXT: } - - -define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { -entry: - br i1 %c0, label %region_start, label %exit - -region_start: - %a = add i32 42, 1 - br i1 %c1, label %exiting0, label %exiting1 - -exiting0: - %b = add i32 42, 2 - br i1 %c2, label %exit, label %exit0 - -exiting1: - %c = add i32 42, 3 - switch i8 %dest, label %fallback [ - i8 0, label %exit0 - i8 1, label %exit1 - i8 2, label %exit2 - i8 3, label %exit0 - ] - -fallback: - unreachable - -exit: - %A = phi i32 [ 42, %entry ], [ %b, %exiting0 ] - store i32 %A, i32* %arg - br label %return - -exit0: - %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] - store i32 %a, i32* %arg - store i32 %B, i32* %arg - br label %after - -exit1: - br label %after - -exit2: - %C = phi i32 [ %c, %exiting1 ] - store i32 %c, i32* %arg - store i32 %C, i32* %arg - br label %return - -after: - %D = phi i32 [ %b, %exit0 ], [ %c, %exit1 ] - store i32 %a, i32* %arg - store i32 %D, i32* %arg - br label %return - -return: - ret void -} diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll deleted file mode 100644 index 2bf743a718c07..0000000000000 --- a/llvm/test/tools/llvm-extract/extract-block-sink.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s - - - -; CHECK-LABEL: define void @foo() { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = alloca i32, align 4 -; CHECK-NEXT: %b = alloca i32, align 4 -; CHECK-NEXT: br label %codeRepl -; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %b) -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: br label %return -; CHECK-EMPTY: -; CHECK-NEXT: return: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - -; CHECK-LABEL: define internal void @foo.region_start(i32* %b) { -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: %a = alloca i32, align 4 -; CHECK-NEXT: br label %region_start -; CHECK-EMPTY: -; CHECK-NEXT: region_start: -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 44, i32* %b, align 4 -; CHECK-NEXT: br label %return.exitStub -; CHECK-EMPTY: -; CHECK-NEXT: return.exitStub: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - - - - - - -declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) -declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) - - -define void @foo() { -entry: - %a = alloca i32, align 4 - %b = alloca i32, align 4 - br label %region_start - -region_start: - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) - store i32 43, i32* %a - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) - store i32 44, i32* %b - br label %return - -return: - ret void -} diff --git a/llvm/test/tools/llvm-extract/extract-block.ll b/llvm/test/tools/llvm-extract/extract-block.ll index 4849e1e917783..7cf0f16033794 100644 --- a/llvm/test/tools/llvm-extract/extract-block.ll +++ b/llvm/test/tools/llvm-extract/extract-block.ll @@ -1,6 +1,4 @@ -; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s --check-prefixes=CHECK,KILL -; RUN: llvm-extract -S -bb foo:bb4 %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP - +; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s ; CHECK: declare void @bar() define void @bar() { @@ -14,11 +12,7 @@ bb: ret void } -; KEEP-LABEL: define i32 @foo(i32 %arg) { -; KEEP: call void @foo.bb4 - -; KILL-LABEL: define dso_local void @foo.bb4( -; KEEP-LABEL: define internal void @foo.bb4( +; CHECK: @foo.bb4 ; CHECK: call void @bar() ; CHECK: %tmp5 define i32 @foo(i32 %arg) { diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index eda65dc6269df..90c92aa61fd8f 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -1,19 +1,10 @@ -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s --check-prefixes=CHECK,KILL -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s ; Extract two groups of basic blocks in two different functions. -; KEEP-LABEL: define i32 @foo(i32 %arg, i32 %arg1) { -; KEEP: call void @foo.if.split( - -; KEEP-LABEL: define i32 @bar(i32 %arg, i32 %arg1) { -; KEEP: %targetBlock = call i1 @bar.bb14( - - ; The first extracted function is the region composed by the ; blocks if, then, and else from foo. -; KILL-LABEL: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { -; KEEP-LABEL: define internal void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { +; CHECK: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { ; CHECK: newFuncRoot: ; CHECK: br label %if.split ; @@ -34,7 +25,7 @@ ; CHECK: %or.cond = and i1 %tmp5, %tmp8 ; CHECK: br i1 %or.cond, label %then, label %else ; -; CHECK: end.split: +; CHECK: end.split: ; preds = %then, %else ; CHECK: %tmp.0.ce = phi i32 [ %tmp13, %then ], [ %tmp25, %else ] ; CHECK: store i32 %tmp.0.ce, i32* %tmp.0.ce.out ; CHECK: br label %end.exitStub @@ -45,8 +36,7 @@ ; The second extracted function is the region composed by the blocks ; bb14 and bb20 from bar. -; KILL-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { -; KEEP-LABEL: define internal i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; CHECK: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { ; CHECK: newFuncRoot: ; CHECK: br label %bb14 ; @@ -60,14 +50,12 @@ ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: store i32 %tmp25, i32* %tmp25.out -; KILL: br label %bb30.exitStub -; KEEP: br label %bb20.split.exitStub +; CHECK: br label %bb30.exitStub ; ; CHECK: bb26.exitStub: ; preds = %bb14 ; CHECK: ret i1 true ; -; KILL: bb30.exitStub: ; preds = %bb20 -; KEEP: bb20.split.exitStub: +; CHECK: bb30.exitStub: ; preds = %bb20 ; CHECK: ret i1 false ; CHECK: } diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 23956db8f9273..cb1c4116ff192 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,8 +84,7 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest (Unlsess " - "using --bb-keep-blocks).\n" + "the first block in the sequence should dominate the rest.\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -93,21 +92,6 @@ static cl::list ExtractBlocks( cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); -static cl::opt KeepFunctions( - "bb-keep-functions", - cl::desc( - "When extracting blocks from functions, keep the original functions; " - "extracted code is replaced by function call to new function"), - cl::cat(ExtractCat)); - -static cl::opt KeepBlocks( - "bb-keep-blocks", - cl::desc("Keep extracted blocks in original function after outlining. This " - "permits branches to any selected basic block from outside the " - "selection and overlapping code regions, but only branches to the " - "first in the group will call the extracted function."), - cl::cat(ExtractCat)); - // ExtractAlias - The alias to extract from the module. static cl::list ExtractAliases("alias", cl::desc("Specify alias to extract"), @@ -374,7 +358,7 @@ int main(int argc, char **argv) { } legacy::PassManager PM; - PM.add(createBlockExtractorPass(GroupOfBBs, !KeepFunctions, KeepBlocks)); + PM.add(createBlockExtractorPass(GroupOfBBs, true)); PM.run(*M); } From 0dfd685743f021a3c0e8b8a323cf06ac0ded7f80 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 21:46:41 -0600 Subject: [PATCH 095/130] clang-format --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 5c58792e4b390..978031b11ea73 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1401,19 +1401,15 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (AggregateArgs && (inputs.size() + outputs.size() > 0)) StructArgTy = cast(newFunction->getArg(0)->getType()); - - emitFunctionBody(inputs, outputs, newFunction, StructArgTy, header, SinkingCands); - std::vector Reloads; CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, oldFunction, ReplIP, EntryFreq, LifetimesStart.getArrayRef(), Reloads); BasicBlock *codeReplacer = call->getParent(); - insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, ExitWeights); @@ -1484,18 +1480,18 @@ void CodeExtractor::emitFunctionBody( BasicBlock *newFuncRoot = BasicBlock::Create(Context, "newFuncRoot", newFunction); - // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. for (auto *II : SinkingCands) { if (!isa(II)) { - cast(II)->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + cast(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); } } for (auto *II : SinkingCands) { if (auto *AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); } } From eb8c6a3ba6e3bc9ddd1ac9579ff665f74034051f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 22:36:52 -0600 Subject: [PATCH 096/130] KeepOldBlocks --- llvm/include/llvm/Transforms/IPO.h | 4 +- llvm/include/llvm/Transforms/Utils/Cloning.h | 14 +- .../llvm/Transforms/Utils/CodeExtractor.h | 15 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 34 +- llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 326 ++++++++++++++---- .../llvm-extract/extract-block-cleanup.ll | 116 +++++++ .../extract-block-multiple-exits.ll | 200 +++++++++++ .../tools/llvm-extract/extract-block-sink.ll | 67 ++++ llvm/test/tools/llvm-extract/extract-block.ll | 10 +- .../extract-blocks-with-groups.ll | 24 +- llvm/tools/llvm-extract/llvm-extract.cpp | 20 +- 12 files changed, 737 insertions(+), 105 deletions(-) create mode 100644 llvm/test/tools/llvm-extract/extract-block-cleanup.ll create mode 100644 llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll create mode 100644 llvm/test/tools/llvm-extract/extract-block-sink.ll diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 67b9a93c47b21..d11c27304815d 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks = false); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks = false); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 5a1f322b20544..cff5e6bc8daea 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,10 +114,16 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. -BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr); +/// +/// If you would like to clone only a subset of instructions in the basic block, +/// you can specify a callback returning true only for those instructions that +/// are to be cloned. +BasicBlock * +CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix = "", Function *F = nullptr, + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr, + function_ref InstSelect = {}); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 780ce9a1ea36c..0bf59874e6762 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -96,6 +96,10 @@ class CodeExtractorAnalysisCache { // If true, varargs functions can be extracted. bool AllowVarArgs; + /// If true, copies the code into the extracted function instead of moving + /// it. + bool KeepOldBlocks; + // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; unsigned NumExitBlocks = std::numeric_limits::max(); @@ -124,12 +128,17 @@ class CodeExtractorAnalysisCache { /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. + /// + /// If KeepOldBlocks is true, the original instances of the extracted region + /// remains in the original function so they can still be branched to from + /// non-extracted blocks. However, only branches to the first block will + /// call the extracted function. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, - std::string Suffix = ""); + AssumptionCache *AC = nullptr, bool AllowVarArgs = false, + bool AllowAlloca = false, std::string Suffix = "", + bool KeepOldBlocks = false); /// Create a code extractor for a loop body. /// diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 7c178f9a98345..408bb55235d7e 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,7 +43,8 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {} + BlockExtractor(bool EraseFunctions, bool KeepOldBlocks = false) + : EraseFunctions(EraseFunctions), KeepOldBlocks(KeepOldBlocks) {} bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { @@ -60,6 +61,7 @@ class BlockExtractor { private: SmallVector, 4> GroupsOfBlocks; bool EraseFunctions; + bool KeepOldBlocks; /// Map a function name to groups of blocks. SmallVector>, 4> BlocksByName; @@ -75,8 +77,8 @@ class BlockExtractorLegacyPass : public ModulePass { public: static char ID; BlockExtractorLegacyPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), BE(EraseFunctions) { + bool EraseFunctions, bool KeepOldBlocks) + : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { // We want one group per element of the input list. SmallVector, 4> MassagedGroupsOfBlocks; for (BasicBlock *BB : BlocksToExtract) { @@ -89,13 +91,14 @@ class BlockExtractorLegacyPass : public ModulePass { BlockExtractorLegacyPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), BE(EraseFunctions) { + bool EraseFunctions, bool KeepOldBlocks) + : ModulePass(ID), BE(EraseFunctions, KeepOldBlocks) { BE.init(GroupsOfBlocksToExtract); } BlockExtractorLegacyPass() - : BlockExtractorLegacyPass(SmallVector(), false) {} + : BlockExtractorLegacyPass(SmallVector(), false, false) { + } }; } // end anonymous namespace @@ -108,14 +111,17 @@ ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorLegacyPass(); } ModulePass *llvm::createBlockExtractorPass( - const SmallVectorImpl &BlocksToExtract, bool EraseFunctions) { - return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions); + const SmallVectorImpl &BlocksToExtract, bool EraseFunctions, + bool KeepOldBlocks) { + return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions, + KeepOldBlocks); } ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions) { - return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks) { + return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions, + KeepOldBlocks); } /// Gets all of the blocks specified in the input file. @@ -223,7 +229,13 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); + Function *F = + CodeExtractor(BlocksToExtractVec, /* DT */ nullptr, + /* AggregateArgs*/ false, /* BFI */ nullptr, + /* BPI */ nullptr, /* AC */ nullptr, + /* AllowVarArgs */ false, /* AllowAlloca */ false, + /* Suffix */ "", KeepOldBlocks) + .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 048e691e33cf1..c08adec77445f 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -40,10 +40,11 @@ using namespace llvm; #define DEBUG_TYPE "clone-function" /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo, - DebugInfoFinder *DIFinder) { +BasicBlock * +llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo, DebugInfoFinder *DIFinder, + function_ref InstSelect) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -53,6 +54,9 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, // Loop over all instructions, and copy them over. for (const Instruction &I : *BB) { + if (InstSelect && !InstSelect(&I)) + continue; + if (DIFinder && TheModule) DIFinder->processInstruction(*TheModule, I); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 978031b11ea73..3141792776197 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -61,7 +61,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include #include #include @@ -197,7 +199,8 @@ static bool isBlockValidForExtraction(const BasicBlock &BB, /// Build a set of blocks to extract if the input blocks are viable. static SetVector buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, - bool AllowVarArgs, bool AllowAlloca) { + bool AllowVarArgs, bool AllowAlloca, + bool KeepOldBlocks) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -229,16 +232,20 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, } // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. - for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB)) { - LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " - "outside the region except for the first block!\n" - << "Problematic source BB: " << BB->getName() << "\n" - << "Problematic destination BB: " << PBB->getName() - << "\n"); - return {}; - } + // the subgraph which is being extracted. KeepOldBlocks relaxes this + // requirement. + if (!KeepOldBlocks) { + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + LLVM_DEBUG(dbgs() + << "No blocks in this region may have entries from " + "outside the region except for the first block!\n" + << "Problematic source BB: " << BB->getName() << "\n" + << "Problematic destination BB: " << PBB->getName() + << "\n"); + return {}; + } + } } return Result; @@ -248,10 +255,12 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) + std::string Suffix, bool KeepOldBlocks) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), + KeepOldBlocks(KeepOldBlocks), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, + KeepOldBlocks)), Suffix(Suffix) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, @@ -259,10 +268,11 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), + BPI(BPI), AC(AC), AllowVarArgs(false), KeepOldBlocks(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, - /* AllowAlloca */ false)), + /* AllowAlloca */ false, + /* KeepOldBlocks */ false)), Suffix(Suffix) {} /// definedInRegion - Return true if the specified value is defined in the @@ -649,6 +659,10 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { + // Ignore assumptions if not been removed yet. + if (isa(II)) + continue; + for (auto &OI : II.operands()) { Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) @@ -1320,15 +1334,17 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, normalizeCFGForExtraction(header); - // Transforms/HotColdSplit/stale-assume-in-original-func.ll - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock *Block : Blocks) { - for (Instruction &I : llvm::make_early_inc_range(*Block)) { - if (auto *AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); + if (!KeepOldBlocks) { + // Transforms/HotColdSplit/stale-assume-in-original-func.ll + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); + } } } } @@ -1386,8 +1402,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Determine position for the replacement code. Do so before header is moved // to the new function. BasicBlock *ReplIP = header; - while (ReplIP && Blocks.count(ReplIP)) - ReplIP = ReplIP->getNextNode(); + if (!KeepOldBlocks) { + while (ReplIP && Blocks.count(ReplIP)) + ReplIP = ReplIP->getNextNode(); + } // Construct new function based on inputs/outputs & add allocas for all defs. std::string SuffixToUse = @@ -1450,6 +1468,31 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header) { // individually. recomputeExitBlocks(); severSplitPHINodesOfExits(); + + // If the option was given, ensure there are no PHI nodes at all in the exit + // nodes themselves. + if (KeepOldBlocks) { + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : make_early_inc_range(successors(Block))) { + if (Blocks.count(Succ)) + continue; + + if (!Succ->getSinglePredecessor()) + Succ = SplitEdge(Block, Succ, DT); + + // Ensure no PHI node in exit block (still possible with single + // predecessor, e.g. LCSSA) + while (auto *P = dyn_cast(&Succ->front())) { + assert(P->getNumIncomingValues() == 1); + P->replaceAllUsesWith(P->getIncomingValue(0)); + P->eraseFromParent(); + } + } + } + + // Exit nodes may have changed by SplitEdge. + recomputeExitBlocks(); + } } void CodeExtractor::recomputeExitBlocks() { @@ -1480,18 +1523,43 @@ void CodeExtractor::emitFunctionBody( BasicBlock *newFuncRoot = BasicBlock::Create(Context, "newFuncRoot", newFunction); + // The map of values from the original function to the corresponding values in + // the extracted function; only used with KeepOldBlocks. + ValueToValueMapTy VMap; + + // Additional instructions not in a extracted block whose operands need to be + // remapped. + SmallVector AdditionalRemap; + + // Copy or move (depending on KeepOldBlocks) an instruction to the new + // function. + auto MoveOrCopyInst = [this, newFuncRoot, &VMap, + &AdditionalRemap](Instruction *I) -> Instruction * { + BasicBlock::iterator IP = newFuncRoot->getFirstInsertionPt(); + if (!KeepOldBlocks) { + I->moveBefore(*newFuncRoot, IP); + return I; + } + + Instruction *ClonedI = I->clone(); + ClonedI->setName(I->getName()); + newFuncRoot->getInstList().insert(IP, ClonedI); + AdditionalRemap.push_back(ClonedI); + VMap[I] = ClonedI; + return ClonedI; + }; + // Now sink all instructions which only have non-phi uses inside the region. // Group the allocas at the start of the block, so that any bitcast uses of // the allocas are well-defined. for (auto *II : SinkingCands) { if (!isa(II)) { - cast(II)->moveBefore(*newFuncRoot, - newFuncRoot->getFirstInsertionPt()); + MoveOrCopyInst(cast(II)); } } for (auto *II : SinkingCands) { if (auto *AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + MoveOrCopyInst(AI); } } @@ -1518,16 +1586,58 @@ void CodeExtractor::emitFunctionBody( NewValues.push_back(RewriteVal); } - moveCodeToFunction(newFunction); + if (KeepOldBlocks) { + // Copy blocks and instrutions to newFunction. + for (BasicBlock *Block : Blocks) { + BasicBlock *CBB = CloneBasicBlock( + Block, VMap, {}, newFunction, /* CodeInfo */ nullptr, + /* DIFinder */ nullptr, + [](const Instruction *I) -> bool { return !isa(I); }); + + // Add basic block mapping. + VMap[Block] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (Block->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(oldFunction, Block); + VMap[OldBBAddr] = BlockAddress::get(newFunction, CBB); + } - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal = NewValues[i]; + // Non-header block may have branches from outside the region. These + // continue to branch to the original blocks, hence remove their PHI + // entries. + if (Block != header) + for (auto &&P : CBB->phis()) { + unsigned NumIncoming = P.getNumIncomingValues(); + for (int Idx = NumIncoming - 1; Idx >= 0; --Idx) { + BasicBlock *IncomingBlock = P.getIncomingBlock(Idx); + if (Blocks.count(IncomingBlock)) + continue; + P.removeIncomingValue(Idx, /*DeletePHIIfEmpty=*/false); + } + } + } - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); + for (auto P : enumerate(inputs)) + VMap[P.value()] = NewValues[P.index()]; + + } else { + moveCodeToFunction(newFunction); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; + + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } } // Create stubs for the original exit blocks. @@ -1543,6 +1653,8 @@ void CodeExtractor::emitFunctionBody( // destination, create one now! NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", newFunction); + if (KeepOldBlocks) + VMap[OldTarget] = NewTarget; Value *brVal = nullptr; assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); @@ -1571,22 +1683,54 @@ void CodeExtractor::emitFunctionBody( BasicBlock *NewTarget = ExitBlockMap[OldTarget]; assert(NewTarget && "Unknown target block!"); - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); + if (!KeepOldBlocks) { + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } else { + VMap[OldTarget] = NewTarget; + } } } - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); + // Update values references to point to the new function. + if (KeepOldBlocks) { + for (BasicBlock *Pred : predecessors(header)) { + if (VMap.count(Pred)) + continue; + VMap[Pred] = newFuncRoot; + } + + for (Instruction *II : AdditionalRemap) + RemapInstruction(II, VMap, RF_NoModuleLevelChanges); + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *Block : Blocks) { + WeakTrackingVH NewBlock = VMap.lookup(Block); + if (!NewBlock) + continue; + + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : cast(*NewBlock)) + RemapInstruction(&II, VMap, RF_NoModuleLevelChanges); + } + } else { + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } } + BasicBlock *NewHeader = + KeepOldBlocks ? cast(VMap.lookup(header)) : header; + assert(NewHeader && "Header must have been cloned/moved to newFunction"); + // Connect newFunction entry block to new header. - BranchInst *BranchI2 = BranchInst::Create(header, newFuncRoot); + BranchInst *BranchI2 = BranchInst::Create(NewHeader, newFuncRoot); applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); // Store the arguments right after the definition of output value. @@ -1598,6 +1742,9 @@ void CodeExtractor::emitFunctionBody( if (!OutI) continue; + if (KeepOldBlocks) + OutI = cast(VMap.lookup(OutI)); + // Find proper insertion point. BasicBlock::iterator InsertPt; // In case OutI is an invoke, we insert the store at the beginning in the @@ -1817,35 +1964,72 @@ void CodeExtractor::insertReplacerCall( if (I->isTerminator() && I->getFunction() == oldFunction) I->replaceUsesOfWith(header, codeReplacer); - // When moving the code region it is sufficient to replace all uses to the - // extracted function values. Since the original definition's block - // dominated its use, it will also be dominated by codeReplacer's switch - // which joined multiple exit blocks. - for (BasicBlock *ExitBB : SwitchCases) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) + if (KeepOldBlocks) { + // Change references to output values after the call to use either the value + // written by the extracted function or the original value if we skipped the + // call. Use SSAUpdater to propagate the new PHI since the CFG has changed. + + SSAUpdater SSA; + for (auto P : enumerate(outputs)) { + size_t OutIdx = P.index(); + Instruction *OldVal = cast(P.value()); + Value *NewVal = Reloads[OutIdx]; + + SSA.Initialize(OldVal->getType(), + (OldVal->getName() + ".merge_with_extracted").str()); + SSA.AddAvailableValue(codeReplacer, NewVal); + + // Could help SSAUpdater by determining in advance which output values are + // available in which exit blocks (from DT). + SSA.AddAvailableValue(OldVal->getParent(), OldVal); + + for (Use &U : make_early_inc_range(OldVal->uses())) { + auto *User = dyn_cast(U.getUser()); + if (!User) continue; + BasicBlock *EffectiveUser = User->getParent(); + if (auto *PHI = dyn_cast(User)) + EffectiveUser = PHI->getIncomingBlock(U); - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); + if (EffectiveUser == codeReplacer || Blocks.count(EffectiveUser)) + continue; + + SSA.RewriteUseAfterInsertions(U); } } + } else { + // When moving the code region it is sufficient to replace all uses to the + // extracted function values. Since the original definition's block + // dominated its use, it will also be dominated by codeReplacer's switch + // which joined multiple exit blocks. + + for (BasicBlock *ExitBB : SwitchCases) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *load = Reloads[i]; - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (inst->getParent()->getParent() == oldFunction) - inst->replaceUsesOfWith(outputs[i], load); + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *load = Reloads[i]; + std::vector Users(outputs[i]->user_begin(), + outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } } } diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll new file mode 100644 index 0000000000000..bbf656fe696f6 --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -0,0 +1,116 @@ +; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + +; CHECK-LABEL: define void @foo(i32* %arg, i1 %c) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly +; CHECK-EMPTY: +; CHECK-NEXT: outsideonly: +; CHECK-NEXT: store i32 0, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: %targetBlock = call i1 @foo.region_start(i32* %arg) +; CHECK-NEXT: br i1 %targetBlock, label %cleanup.return_crit_edge, label %region_end.split +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: br label %extractonly +; CHECK-EMPTY: +; CHECK-NEXT: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; CHECK-EMPTY: +; CHECK-NEXT: cleanup: +; CHECK-NEXT: %dest = phi i8 [ 0, %outsideonly ], [ 1, %extractonly ] +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge +; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: cleanup.return_crit_edge: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: region_end: +; CHECK-NEXT: br label %region_end.split +; CHECK-EMPTY: +; CHECK-NEXT: region_end.split: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: outsidecont: +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; CHECK-LABEL: define internal i1 @foo.region_start(i32* %arg) { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: br label %extractonly +; CHECK-EMPTY: +; CHECK-NEXT: extractonly: +; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: br label %cleanup +; CHECK-EMPTY: +; CHECK-NEXT: cleanup: +; CHECK-NEXT: %dest = phi i8 [ 1, %extractonly ] +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %cleanup.return_crit_edge.exitStub +; CHECK-NEXT: i8 1, label %region_end +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: region_end: +; CHECK-NEXT: br label %region_end.split.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: cleanup.return_crit_edge.exitStub: +; CHECK-NEXT: ret i1 true +; CHECK-EMPTY: +; CHECK-NEXT: region_end.split.exitStub: +; CHECK-NEXT: ret i1 false +; CHECK-NEXT: } + + + +define void @foo(i32* %arg, i1 %c) { +entry: + br i1 %c, label %region_start, label %outsideonly + +outsideonly: + store i32 0, i32* %arg, align 4 + br label %cleanup + +region_start: + br label %extractonly + +extractonly: + store i32 1, i32* %arg, align 4 + br label %cleanup + +cleanup: + %dest = phi i8 [0, %outsideonly], [1, %extractonly] + switch i8 %dest, label %fallback [ + i8 0, label %return + i8 1, label %region_end + ] + +fallback: + unreachable + +region_end: + br label %return + +outsidecont: + br label %return + +return: + ret void +} diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll new file mode 100644 index 0000000000000..b4c0667b9a58d --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -0,0 +1,200 @@ +; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + +; CHECK-LABEL: define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %B.ce.loc = alloca i32, align 4 +; CHECK-NEXT: %c.loc = alloca i32, align 4 +; CHECK-NEXT: %b.loc = alloca i32, align 4 +; CHECK-NEXT: %a.loc = alloca i32, align 4 +; CHECK-NEXT: br i1 %c0, label %codeRepl, label %exit +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: %lt.cast = bitcast i32* %a.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast) +; CHECK-NEXT: %lt.cast1 = bitcast i32* %b.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1) +; CHECK-NEXT: %lt.cast2 = bitcast i32* %c.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast2) +; CHECK-NEXT: %lt.cast3 = bitcast i32* %B.ce.loc to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast3) +; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.loc, i32* %b.loc, i32* %c.loc, i32* %B.ce.loc) +; CHECK-NEXT: %a.reload = load i32, i32* %a.loc, align 4 +; CHECK-NEXT: %b.reload = load i32, i32* %b.loc, align 4 +; CHECK-NEXT: %c.reload = load i32, i32* %c.loc, align 4 +; CHECK-NEXT: %B.ce.reload = load i32, i32* %B.ce.loc, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast2) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast3) +; CHECK-NEXT: switch i16 %targetBlock, label %exit0 [ +; CHECK-NEXT: i16 0, label %exiting0.exit_crit_edge +; CHECK-NEXT: i16 1, label %fallback +; CHECK-NEXT: i16 2, label %exit1 +; CHECK-NEXT: i16 3, label %exit2 +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: %a = add i32 42, 1 +; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 +; CHECK-EMPTY: +; CHECK-NEXT: exiting0: +; CHECK-NEXT: %b = add i32 42, 2 +; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge, label %exit0.split +; CHECK-EMPTY: +; CHECK-NEXT: exiting0.exit_crit_edge: +; CHECK-NEXT: %b.merge_with_extracted7 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] +; CHECK-NEXT: br label %exit +; CHECK-EMPTY: +; CHECK-NEXT: exiting1: +; CHECK-NEXT: %c = add i32 42, 3 +; CHECK-NEXT: switch i8 %dest, label %fallback [ +; CHECK-NEXT: i8 0, label %exit0.split +; CHECK-NEXT: i8 1, label %exit1 +; CHECK-NEXT: i8 2, label %exit2 +; CHECK-NEXT: i8 3, label %exit0.split +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: fallback: +; CHECK-NEXT: unreachable +; CHECK-EMPTY: +; CHECK-NEXT: exit: +; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted7, %exiting0.exit_crit_edge ] +; CHECK-NEXT: store i32 %A, i32* %arg, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: exit0.split: +; CHECK-NEXT: %b.merge_with_extracted6 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] +; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] +; CHECK-NEXT: br label %exit0 +; CHECK-EMPTY: +; CHECK-NEXT: exit0: +; CHECK-NEXT: %B.ce.merge_with_extracted = phi i32 [ %B.ce.reload, %codeRepl ], [ %B.ce, %exit0.split ] +; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted6, %exit0.split ] +; CHECK-NEXT: %a.merge_with_extracted5 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] +; CHECK-NEXT: store i32 %a.merge_with_extracted5, i32* %arg, align 4 +; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: br label %after +; CHECK-EMPTY: +; CHECK-NEXT: exit1: +; CHECK-NEXT: %c.merge_with_extracted8 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: %a.merge_with_extracted4 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] +; CHECK-NEXT: br label %after +; CHECK-EMPTY: +; CHECK-NEXT: exit2: +; CHECK-NEXT: %c.merge_with_extracted = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: after: +; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted5, %exit0 ], [ %a.merge_with_extracted4, %exit1 ] +; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted8, %exit1 ] +; CHECK-NEXT: store i32 %a.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: store i32 %D, i32* %arg, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.out, i32* %b.out, i32* %c.out, i32* %B.ce.out) { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: %a = add i32 42, 1 +; CHECK-NEXT: store i32 %a, i32* %a.out, align 4 +; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 +; CHECK-EMPTY: +; CHECK-NEXT: exiting0: +; CHECK-NEXT: %b = add i32 42, 2 +; CHECK-NEXT: store i32 %b, i32* %b.out, align 4 +; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge.exitStub, label %exit0.split +; CHECK-EMPTY: +; CHECK-NEXT: exiting1: +; CHECK-NEXT: %c = add i32 42, 3 +; CHECK-NEXT: store i32 %c, i32* %c.out, align 4 +; CHECK-NEXT: switch i8 %dest, label %fallback.exitStub [ +; CHECK-NEXT: i8 0, label %exit0.split +; CHECK-NEXT: i8 1, label %exit1.exitStub +; CHECK-NEXT: i8 2, label %exit2.exitStub +; CHECK-NEXT: i8 3, label %exit0.split +; CHECK-NEXT: ] +; CHECK-EMPTY: +; CHECK-NEXT: exit0.split: +; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] +; CHECK-NEXT: store i32 %B.ce, i32* %B.ce.out, align 4 +; CHECK-NEXT: br label %exit0.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: exiting0.exit_crit_edge.exitStub: +; CHECK-NEXT: ret i16 0 +; CHECK-EMPTY: +; CHECK-NEXT: fallback.exitStub: +; CHECK-NEXT: ret i16 1 +; CHECK-EMPTY: +; CHECK-NEXT: exit1.exitStub: +; CHECK-NEXT: ret i16 2 +; CHECK-EMPTY: +; CHECK-NEXT: exit2.exitStub: +; CHECK-NEXT: ret i16 3 +; CHECK-EMPTY: +; CHECK-NEXT: exit0.exitStub: +; CHECK-NEXT: ret i16 4 +; CHECK-NEXT: } + + +define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +entry: + br i1 %c0, label %region_start, label %exit + +region_start: + %a = add i32 42, 1 + br i1 %c1, label %exiting0, label %exiting1 + +exiting0: + %b = add i32 42, 2 + br i1 %c2, label %exit, label %exit0 + +exiting1: + %c = add i32 42, 3 + switch i8 %dest, label %fallback [ + i8 0, label %exit0 + i8 1, label %exit1 + i8 2, label %exit2 + i8 3, label %exit0 + ] + +fallback: + unreachable + +exit: + %A = phi i32 [ 42, %entry ], [ %b, %exiting0 ] + store i32 %A, i32* %arg + br label %return + +exit0: + %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] + store i32 %a, i32* %arg + store i32 %B, i32* %arg + br label %after + +exit1: + br label %after + +exit2: + %C = phi i32 [ %c, %exiting1 ] + store i32 %c, i32* %arg + store i32 %C, i32* %arg + br label %return + +after: + %D = phi i32 [ %b, %exit0 ], [ %c, %exit1 ] + store i32 %a, i32* %arg + store i32 %D, i32* %arg + br label %return + +return: + ret void +} diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll new file mode 100644 index 0000000000000..2bf743a718c07 --- /dev/null +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -0,0 +1,67 @@ +; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s + + + +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = alloca i32, align 4 +; CHECK-NEXT: %b = alloca i32, align 4 +; CHECK-NEXT: br label %codeRepl +; CHECK-EMPTY: +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: call void @foo.region_start(i32* %b) +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: br label %return +; CHECK-EMPTY: +; CHECK-NEXT: return: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; CHECK-LABEL: define internal void @foo.region_start(i32* %b) { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: %a = alloca i32, align 4 +; CHECK-NEXT: br label %region_start +; CHECK-EMPTY: +; CHECK-NEXT: region_start: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) +; CHECK-NEXT: store i32 43, i32* %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) +; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: br label %return.exitStub +; CHECK-EMPTY: +; CHECK-NEXT: return.exitStub: +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + + + + + +declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) +declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) + + +define void @foo() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %region_start + +region_start: + call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) + store i32 43, i32* %a + call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) + store i32 44, i32* %b + br label %return + +return: + ret void +} diff --git a/llvm/test/tools/llvm-extract/extract-block.ll b/llvm/test/tools/llvm-extract/extract-block.ll index 7cf0f16033794..4849e1e917783 100644 --- a/llvm/test/tools/llvm-extract/extract-block.ll +++ b/llvm/test/tools/llvm-extract/extract-block.ll @@ -1,4 +1,6 @@ -; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s +; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -S -bb foo:bb4 %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP + ; CHECK: declare void @bar() define void @bar() { @@ -12,7 +14,11 @@ bb: ret void } -; CHECK: @foo.bb4 +; KEEP-LABEL: define i32 @foo(i32 %arg) { +; KEEP: call void @foo.bb4 + +; KILL-LABEL: define dso_local void @foo.bb4( +; KEEP-LABEL: define internal void @foo.bb4( ; CHECK: call void @bar() ; CHECK: %tmp5 define i32 @foo(i32 %arg) { diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index 90c92aa61fd8f..eda65dc6269df 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -1,10 +1,19 @@ -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP ; Extract two groups of basic blocks in two different functions. +; KEEP-LABEL: define i32 @foo(i32 %arg, i32 %arg1) { +; KEEP: call void @foo.if.split( + +; KEEP-LABEL: define i32 @bar(i32 %arg, i32 %arg1) { +; KEEP: %targetBlock = call i1 @bar.bb14( + + ; The first extracted function is the region composed by the ; blocks if, then, and else from foo. -; CHECK: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { +; KILL-LABEL: define dso_local void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { +; KEEP-LABEL: define internal void @foo.if.split(i32 %arg1, i32 %arg, i32* %tmp.0.ce.out) { ; CHECK: newFuncRoot: ; CHECK: br label %if.split ; @@ -25,7 +34,7 @@ ; CHECK: %or.cond = and i1 %tmp5, %tmp8 ; CHECK: br i1 %or.cond, label %then, label %else ; -; CHECK: end.split: ; preds = %then, %else +; CHECK: end.split: ; CHECK: %tmp.0.ce = phi i32 [ %tmp13, %then ], [ %tmp25, %else ] ; CHECK: store i32 %tmp.0.ce, i32* %tmp.0.ce.out ; CHECK: br label %end.exitStub @@ -36,7 +45,8 @@ ; The second extracted function is the region composed by the blocks ; bb14 and bb20 from bar. -; CHECK: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; KILL-LABEL: define dso_local i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { +; KEEP-LABEL: define internal i1 @bar.bb14(i32 %arg1, i32 %arg, i32* %tmp25.out) { ; CHECK: newFuncRoot: ; CHECK: br label %bb14 ; @@ -50,12 +60,14 @@ ; CHECK: %tmp24 = sdiv i32 %arg1, 6 ; CHECK: %tmp25 = add nsw i32 %tmp24, %tmp22 ; CHECK: store i32 %tmp25, i32* %tmp25.out -; CHECK: br label %bb30.exitStub +; KILL: br label %bb30.exitStub +; KEEP: br label %bb20.split.exitStub ; ; CHECK: bb26.exitStub: ; preds = %bb14 ; CHECK: ret i1 true ; -; CHECK: bb30.exitStub: ; preds = %bb20 +; KILL: bb30.exitStub: ; preds = %bb20 +; KEEP: bb20.split.exitStub: ; CHECK: ret i1 false ; CHECK: } diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index cb1c4116ff192..23956db8f9273 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -84,7 +84,8 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest.\n" + "the first block in the sequence should dominate the rest (Unlsess " + "using --bb-keep-blocks).\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " @@ -92,6 +93,21 @@ static cl::list ExtractBlocks( cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); +static cl::opt KeepFunctions( + "bb-keep-functions", + cl::desc( + "When extracting blocks from functions, keep the original functions; " + "extracted code is replaced by function call to new function"), + cl::cat(ExtractCat)); + +static cl::opt KeepBlocks( + "bb-keep-blocks", + cl::desc("Keep extracted blocks in original function after outlining. This " + "permits branches to any selected basic block from outside the " + "selection and overlapping code regions, but only branches to the " + "first in the group will call the extracted function."), + cl::cat(ExtractCat)); + // ExtractAlias - The alias to extract from the module. static cl::list ExtractAliases("alias", cl::desc("Specify alias to extract"), @@ -358,7 +374,7 @@ int main(int argc, char **argv) { } legacy::PassManager PM; - PM.add(createBlockExtractorPass(GroupOfBBs, true)); + PM.add(createBlockExtractorPass(GroupOfBBs, !KeepFunctions, KeepBlocks)); PM.run(*M); } From 2702100aad343a239458e5244b7e9b5ae705b044 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 31 Mar 2022 00:10:33 -0500 Subject: [PATCH 097/130] [CodeRefactor] rebase --- .../llvm/Transforms/Utils/CodeExtractor.h | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 979 +++--------------- .../Transforms/Utils/CodeExtractorTest.cpp | 30 +- 3 files changed, 198 insertions(+), 823 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 2059205a99853..f4ea425a309e3 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -262,24 +262,28 @@ class CodeExtractorAnalysisCache { /// Normalizes the control flow of the extracted regions, such as ensuring /// that the extracted region does not contain a return instruction. - void normalizeCFGForExtraction(BasicBlock *&Header); + void normalizeCFGForExtraction(BasicBlock *&header); /// Generates the function declaration for the function containing the /// extracted code. Function *constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs, BlockFrequency EntryFreq, - const Twine &Name); + const Twine &Name, + ValueSet &StructValues, + StructType *&StructTy); /// Generates the code for the extracted function. That is: a prolog, the /// moved or copied code from the original function, and epilogs for each /// exit. void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, - Function *newFunction, StructType *StructArgTy, - BasicBlock *header, const ValueSet &SinkingCands); + const ValueSet &StructValues, Function *newFunction, + StructType *StructArgTy, BasicBlock *header, + const ValueSet &SinkingCands); /// Generates a Basic Block that calls the extracted function. CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs, + const ValueSet &StructValues, Function *newFunction, StructType *StructArgTy, Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 28e8c81d69572..59c6c208375d5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -806,12 +806,9 @@ void CodeExtractor::splitReturnBlocks() { } } -/// constructFunction - make a function based on inputs and outputs, as follows: -/// f(in0, ..., inN, out0, ..., outN) -Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, - const ValueSet &outputs, - BlockFrequency EntryFreq, - const Twine &Name) { +Function *CodeExtractor::constructFunctionDeclaration( + const ValueSet &inputs, const ValueSet &outputs, BlockFrequency EntryFreq, + const Twine &Name, ValueSet &StructValues, StructType *&StructTy) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); @@ -833,9 +830,9 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, break; } + // Assemble the function's parameter lists. std::vector ParamTy; std::vector AggParamTy; - ValueSet StructValues; // Add the types of the input values to the function's argument list for (Value *value : inputs) { @@ -865,9 +862,7 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, "Expeced StructValues only with AggregateArgs set"); // Concatenate scalar and aggregate params in ParamTy. - size_t NumScalarParams = ParamTy.size(); - StructType *StructTy = nullptr; - if (AggregateArgs && !AggParamTy.empty()) { + if (!AggParamTy.empty()) { StructTy = StructType::get(M->getContext(), AggParamTy); ParamTy.push_back(PointerType::getUnqual(StructTy)); } @@ -883,33 +878,9 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg()); // Create the new function -<<<<<<< HEAD Function *newFunction = Function::Create(funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), Name, M); - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); -||||||| fed966f2a456 - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); -======= - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); ->>>>>>> main // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) @@ -1025,94 +996,28 @@ Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, newFunction->addFnAttr(Attr); } -<<<<<<< HEAD - // Set parameter attributes. - if (!AggregateArgs) { - // Set swifterror parameter attributes. - for (auto P : enumerate(inputs)) - if (P.value()->isSwiftError()) - newFunction->addParamAttr(P.index(), Attribute::SwiftError); - - // Set names for input and output arguments. - Function::arg_iterator AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); -||||||| fed966f2a456 - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); -======= // Create scalar and aggregate iterators to name all of the arguments we // inserted. Function::arg_iterator ScalarAI = newFunction->arg_begin(); - Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams); - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs && StructValues.contains(inputs[i])) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP, - "loadgep_" + inputs[i]->getName(), TI); - ++aggIdx; - } else - RewriteVal = &*ScalarAI++; + // Set names and attributes for input and output arguments. + ScalarAI = newFunction->arg_begin(); + for (Value *input : inputs) { + if (StructValues.contains(input)) + continue; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); + ScalarAI->setName(input->getName()); + if (input->isSwiftError()) + newFunction->addParamAttr(ScalarAI - newFunction->arg_begin(), + Attribute::SwiftError); + ++ScalarAI; } + for (Value *output : outputs) { + if (StructValues.contains(output)) + continue; - // Set names for input and output arguments. - if (NumScalarParams) { - ScalarAI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI) - if (!StructValues.contains(inputs[i])) - ScalarAI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI) - if (!StructValues.contains(outputs[i])) - ScalarAI->setName(outputs[i]->getName() + ".out"); ->>>>>>> main + ScalarAI->setName(output->getName() + ".out"); + ++ScalarAI; } // Update the entry count of the function. @@ -1222,592 +1127,12 @@ static void insertLifetimeMarkersSurroundingCall( } } -<<<<<<< HEAD -||||||| fed966f2a456 -/// emitCallAndSwitchStatement - This method sets up the caller side by adding -/// the call instruction, splitting any PHI nodes in the header block as -/// necessary. -CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs) { - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; - } - - // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } else { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } - - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } - - // Emit the call to the function - call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); - Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - unsigned SuccNum = switchVal++; - - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } - } - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - return call; -} - -======= -/// emitCallAndSwitchStatement - This method sets up the caller side by adding -/// the call instruction, splitting any PHI nodes in the header block as -/// necessary. -CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs) { - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, ReloadOutputs, Reloads; - ValueSet StructValues; - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; - - // Add inputs as params, or to be filled into the struct - unsigned ScalarInputArgNo = 0; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input)) - StructValues.insert(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ScalarInputArgNo); - } - ++ScalarInputArgNo; - } - - // Create allocas for the outputs - unsigned ScalarOutputArgNo = 0; - for (Value *output : outputs) { - if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { - StructValues.insert(output); - } else { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - ++ScalarOutputArgNo; - } - } - - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - unsigned NumAggregatedInputs = 0; - if (AggregateArgs && !StructValues.empty()) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst( - StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", - AllocationBlock ? &*AllocationBlock->getFirstInsertionPt() - : &codeReplacer->getParent()->front().front()); - params.push_back(Struct); - - // Store aggregated inputs in the struct. - for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { - if (inputs.contains(StructValues[i])) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - NumAggregatedInputs++; - } - } - } - - // Emit the call to the function - call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - - // Reload the outputs passed in by reference, use the struct if output is in - // the aggregate or reload from the scalar argument. - for (unsigned i = 0, e = outputs.size(), scalarIdx = 0, - aggIdx = NumAggregatedInputs; - i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs && StructValues.contains(outputs[i])) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - ++aggIdx; - } else { - Output = ReloadOutputs[scalarIdx]; - ++scalarIdx; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); - Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - unsigned SuccNum = switchVal++; - - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); - - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } - } - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin(); - std::advance(ScalarOutputArgBegin, ScalarInputArgNo); - Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin(); - std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo); - - for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e; - ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - if (AggregateArgs && StructValues.contains(outputs[i])) { - assert(AggOutputArgBegin != newFunction->arg_end() && - "Number of aggregate output arguments should match " - "the number of defined values"); - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - ++aggIdx; - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment AggOutputArgBegin, which - // always points to the struct argument, in this case. - } else { - assert(ScalarOutputArgBegin != newFunction->arg_end() && - "Number of scalar output arguments should match " - "the number of defined values"); - new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore); - ++ScalarOutputArgBegin; - } - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - return call; -} - ->>>>>>> main void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = (*Blocks.begin())->getParent(); + Function *oldFunc = Blocks.front()->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - auto newFuncIt = newFunction->front().getIterator(); + auto newFuncIt = newFunction->begin(); for (BasicBlock *Block : Blocks) { // Delete the basic block from the old function, and the list of blocks oldBlocks.remove(Block); @@ -2010,7 +1335,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, normalizeCFGForExtraction(header); - // Transforms/HotColdSplit/stale-assume-in-original-func.ll // Remove @llvm.assume calls that will be moved to the new function from the // old function's assumption cache. for (BasicBlock *Block : Blocks) { @@ -2028,7 +1352,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); - // analysis, after ret splitting (for values returned) // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); @@ -2040,14 +1363,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); if (!HoistingCands.empty()) { - BasicBlock *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Instruction *TI = HoistToBlock->getTerminator(); for (auto *II : HoistingCands) cast(II)->moveBefore(TI); recomputeExitBlocks(); } - // CFG/ExitBlocks fixed after here + // CFG/ExitBlocks must not change hereafter // Calculate the entry frequency of the new function before we change the root // block. @@ -2067,6 +1390,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, if (!Blocks.count(Block)) continue; + // Update the branch weight for this successor. BlockFrequency &BF = ExitWeights[Succ]; BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); } @@ -2084,26 +1408,25 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Suffix.empty() ? (header->getName().empty() ? "extracted" : header->getName().str()) : Suffix; - Function *newFunction = constructFunctionDeclaration( - inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse); - StructType *StructArgTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) - StructArgTy = cast(newFunction->getArg(0)->getType()); + ValueSet StructValues; + StructType *StructTy; + Function *newFunction = constructFunctionDeclaration( + inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse, + StructValues, StructTy); - emitFunctionBody(inputs, outputs, newFunction, StructArgTy, header, + emitFunctionBody(inputs, outputs, StructValues, newFunction, StructTy, header, SinkingCands); std::vector Reloads; - CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, - oldFunction, ReplIP, EntryFreq, - LifetimesStart.getArrayRef(), Reloads); - BasicBlock *codeReplacer = call->getParent(); + CallInst *TheCall = emitReplacerCall( + inputs, outputs, StructValues, newFunction, StructTy, oldFunction, ReplIP, + EntryFreq, LifetimesStart.getArrayRef(), Reloads); - insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, - ExitWeights); + insertReplacerCall(oldFunction, header, TheCall->getParent(), outputs, + Reloads, ExitWeights); - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); // Mark the new function `noreturn` if applicable. Terminators which resume // exception propagation are treated as returning instructions. This is to @@ -2126,13 +1449,13 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, return newFunction; } -void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header) { +void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&header) { // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(Header); + severSplitPHINodesOfEntry(header); // If a PHI in an exit block has multiple invoming values from the outlined // region, create a new PHI for those values within the region such that only @@ -2145,7 +1468,7 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header) { void CodeExtractor::recomputeExitBlocks() { SwitchCases.clear(); - SmallPtrSet ExitBlocks; + SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) { for (BasicBlock *Succ : successors(Block)) { if (Blocks.count(Succ)) @@ -2160,7 +1483,8 @@ void CodeExtractor::recomputeExitBlocks() { } void CodeExtractor::emitFunctionBody( - const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs, + const ValueSet &StructValues, Function *newFunction, StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) { Function *oldFunction = header->getParent(); LLVMContext &Context = oldFunction->getContext(); @@ -2185,25 +1509,27 @@ void CodeExtractor::emitFunctionBody( } } - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); + Function::arg_iterator ScalarAI = newFunction->arg_begin(); + Argument *AggArg = StructValues.empty() + ? nullptr + : newFunction->getArg(newFunction->arg_size() - 1); // Rewrite all users of the inputs in the extracted region to use the // arguments (or appropriate addressing into struct) instead. SmallVector NewValues; - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) { Value *RewriteVal; - if (AggregateArgs) { + if (StructValues.contains(inputs[i])) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - Instruction *TI = newFunction->begin()->getTerminator(); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); + StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot); + RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP, + "loadgep_" + inputs[i]->getName(), newFuncRoot); + ++aggIdx; } else - RewriteVal = &*AI++; + RewriteVal = &*ScalarAI++; NewValues.push_back(RewriteVal); } @@ -2220,14 +1546,20 @@ void CodeExtractor::emitFunctionBody( inst->replaceUsesOfWith(inputs[i], RewriteVal); } - // Create stubs for the original exit blocks. + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. std::map ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. for (auto P : enumerate(SwitchCases)) { BasicBlock *OldTarget = P.value(); size_t SuccNum = P.index(); BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - assert(!NewTarget && "Switch cases muast be unique"); // If we don't already have an exit stub for this non-extracted // destination, create one now! @@ -2241,7 +1573,8 @@ void CodeExtractor::emitFunctionBody( case 1: break; // No value needed. case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + brVal = ConstantInt::get(Type::getInt1Ty(Context), + !SuccNum); // MK: why the invert? break; default: brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); @@ -2276,56 +1609,66 @@ void CodeExtractor::emitFunctionBody( } // Connect newFunction entry block to new header. - BranchInst *BranchI2 = BranchInst::Create(header, newFuncRoot); - applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + BranchInst *BranchI = BranchInst::Create(header, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI); // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke // result restore will be placed in the outlined function. - Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; + ScalarAI = newFunction->arg_begin(); + unsigned AggIdx = 0; - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); + for (Value *Input : inputs) { + if (StructValues.contains(Input)) + ++AggIdx; else - InsertPt = std::next(OutI->getIterator()); + ++ScalarAI; + } - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || + for (Value *Output : outputs) { + // Find proper insertion point. + // In case Output is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after + // Output. + Instruction *InsertBefore = nullptr; + if (auto *InvokeI = dyn_cast(Output)) + InsertBefore = &*InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(Output)) + InsertBefore = &*Phi->getParent()->getFirstInsertionPt(); + else if (auto *OutI = dyn_cast(Output)) + InsertBefore = &*std::next(OutI->getIterator()); + + assert((!InsertBefore || InsertBefore->getFunction() == newFunction || Blocks.count(InsertBefore->getParent())) && "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(OutI, GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. + + if (StructValues.contains(Output)) { + if (InsertBefore) { + assert(AggArg && "Number of aggregate output arguments should match " + "the number of defined values"); + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, AggArg, Idx, "gep_" + Output->getName(), InsertBefore); + new StoreInst(Output, GEP, InsertBefore); + } + ++AggIdx; } else { - new StoreInst(OutI, &*OAI, InsertBefore); - ++OAI; + if (InsertBefore) { + assert(ScalarAI != newFunction->arg_end() && + "Number of scalar output arguments should match " + "the number of defined values"); + new StoreInst(Output, &*ScalarAI, InsertBefore); + } + ++ScalarAI; } } } CallInst *CodeExtractor::emitReplacerCall( - const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + const ValueSet &inputs, const ValueSet &outputs, + const ValueSet &StructValues, Function *newFunction, StructType *StructArgTy, Function *oldFunction, BasicBlock *ReplIP, BlockFrequency EntryFreq, ArrayRef LifetimesStart, std::vector &Reloads) { @@ -2336,49 +1679,56 @@ CallInst *CodeExtractor::emitReplacerCall( // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); - BasicBlock *AllocaBlock = &oldFunction->front(); + BasicBlock *AllocaBlock = + AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock(); // Update the entry count of the function. if (BFI) BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - // Add inputs as params, or to be filled into the struct std::vector params; - AllocaInst *Struct = nullptr; - if (AggregateArgs && StructArgTy) { - std::vector StructValues; - for (Value *input : inputs) { - StructValues.push_back(input); - } - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", &AllocaBlock->front()); + // Add inputs as params, or to be filled into the struct + for (Value *input : inputs) { + if (StructValues.contains(input)) + continue; + + params.push_back(input); + } + + // Create allocas for the outputs + std::vector ReloadOutputs; + for (Value *output : outputs) { + if (StructValues.contains(output)) + continue; + + AllocaInst *alloca = new AllocaInst( + output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &*AllocaBlock->getFirstInsertionPt()); + params.push_back(alloca); + ReloadOutputs.push_back(alloca); + } + AllocaInst *Struct = nullptr; + if (!StructValues.empty()) { + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &*AllocaBlock->getFirstInsertionPt()); params.push_back(Struct); - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + unsigned AggIdx = 0; + for (Value *input : inputs) { + if (!StructValues.contains(input)) + continue; + Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + StructArgTy, Struct, Idx, "gep_" + input->getName()); codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } + new StoreInst(input, GEP, codeReplacer); - std::vector ReloadOutputs; - if (!AggregateArgs) { - for (Value *input : inputs) - params.push_back(input); - - // Create allocas for the outputs - for (Value *output : outputs) { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, - output->getName() + ".loc", &AllocaBlock->front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); + ++AggIdx; } } @@ -2388,10 +1738,15 @@ CallInst *CodeExtractor::emitReplacerCall( NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); // Set swifterror parameter attributes. - if (!AggregateArgs) { - for (auto P : enumerate(inputs)) { - if (P.value()->isSwiftError()) - call->addParamAttr(P.index(), Attribute::SwiftError); + unsigned ParamIdx = 0; + unsigned AggIdx = 0; + for (auto input : inputs) { + if (StructValues.contains(input)) { + ++AggIdx; + } else { + if (input->isSwiftError()) + call->addParamAttr(ParamIdx, Attribute::SwiftError); + ++ParamIdx; } } @@ -2399,24 +1754,27 @@ CallInst *CodeExtractor::emitReplacerCall( // info. In that case, the terminator of the entry block of the extracted // function contains the first debug location of the extracted function, // set in extractCodeRegion. - if (oldFunction->getSubprogram()) { + if (codeReplacer->getParent()->getSubprogram()) { if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) call->setDebugLoc(DL); } - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + // Reload the outputs passed in by reference, use the struct if output is in + // the aggregate or reload from the scalar argument. + for (unsigned i = 0, e = outputs.size(), scalarIdx = 0; i != e; ++i) { Value *Output = nullptr; - if (AggregateArgs) { + if (StructValues.contains(outputs[i])) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; + ++AggIdx; } else { - Output = ReloadOutputs[i]; + Output = ReloadOutputs[scalarIdx]; + ++scalarIdx; } LoadInst *load = new LoadInst(outputs[i]->getType(), Output, @@ -2466,8 +1824,9 @@ CallInst *CodeExtractor::emitReplacerCall( TheSwitch->eraseFromParent(); break; case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); + BranchInst::Create( + TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), call, + TheSwitch); // MK: tight order (branch is switched if boolean) TheSwitch->eraseFromParent(); break; default: @@ -2486,7 +1845,8 @@ CallInst *CodeExtractor::emitReplacerCall( // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(M, LifetimesStart, {}, call); + insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), LifetimesStart, + {}, call); return call; } @@ -2504,7 +1864,8 @@ void CodeExtractor::insertReplacerCall( // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) I->replaceUsesOfWith(header, codeReplacer); // When moving the code region it is sufficient to replace all uses to the diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index c142729e2c6f4..b0294ef8a3be6 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -513,19 +513,28 @@ TEST(CodeExtractor, PartialAggregateArgs) { target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" - declare void @use(i32) + ; use different types such that an index mismatch will result in a type mismatch during verification. + declare void @use16(i16) + declare void @use32(i32) + declare void @use64(i64) - define void @foo(i32 %a, i32 %b, i32 %c) { + define void @foo(i16 %a, i32 %b, i64 %c) { entry: br label %extract extract: - call void @use(i32 %a) - call void @use(i32 %b) - call void @use(i32 %c) + call void @use16(i16 %a) + call void @use32(i32 %b) + call void @use64(i64 %c) + %d = add i16 21, 21 + %e = add i32 21, 21 + %f = add i64 21, 21 br label %exit exit: + call void @use16(i16 %d) + call void @use32(i32 %e) + call void @use64(i64 %f) ret void } )ir", @@ -544,14 +553,15 @@ TEST(CodeExtractor, PartialAggregateArgs) { BasicBlock *CommonExit = nullptr; CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); CE.findInputsOutputs(Inputs, Outputs, SinkingCands); - // Exclude the first input from the argument aggregate. - CE.excludeArgFromAggregate(Inputs[0]); + // Exclude the middle input and output from the argument aggregate. + CE.excludeArgFromAggregate(Inputs[1]); + CE.excludeArgFromAggregate(Outputs[1]); Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs); EXPECT_TRUE(Outlined); - // Expect 2 arguments in the outlined function: the excluded input and the - // struct aggregate for the remaining inputs. - EXPECT_EQ(Outlined->arg_size(), 2U); + // Expect 3 arguments in the outlined function: the excluded input, the + // excluded output, and the struct aggregate for the remaining inputs. + EXPECT_EQ(Outlined->arg_size(), 3U); EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func)); } From 974072d279f55eebe58180932ff5937b64a12950 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 31 Mar 2022 15:13:18 -0500 Subject: [PATCH 098/130] some simplification/comments --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 59c6c208375d5..077f8a142239f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1559,12 +1559,9 @@ void CodeExtractor::emitFunctionBody( BasicBlock *OldTarget = P.value(); size_t SuccNum = P.index(); - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", - newFunction); + BasicBlock *NewTarget = BasicBlock::Create( + Context, OldTarget->getName() + ".exitStub", newFunction); + ExitBlockMap[OldTarget] = NewTarget; Value *brVal = nullptr; assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); @@ -1573,8 +1570,7 @@ void CodeExtractor::emitFunctionBody( case 1: break; // No value needed. case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), - !SuccNum); // MK: why the invert? + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); break; default: brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); @@ -1824,9 +1820,11 @@ CallInst *CodeExtractor::emitReplacerCall( TheSwitch->eraseFromParent(); break; case 2: - BranchInst::Create( - TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), call, - TheSwitch); // MK: tight order (branch is switched if boolean) + // Only two destinations, convert to a condition branch. + // Remark: This also swaps the target branches: + // 0 -> false -> getSuccessor(2); 1 -> true -> getSuccessor(1) + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); TheSwitch->eraseFromParent(); break; default: From d3a9fc9ae536e691eb0d34d9de9167cd95996537 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 31 Mar 2022 17:28:46 -0500 Subject: [PATCH 099/130] Fix after merge conflicts --- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 16 +++++++++++----- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 16 +++++++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 408bb55235d7e..53ca44c36cd7a 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -230,11 +230,17 @@ bool BlockExtractor::runOnModule(Module &M) { } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); Function *F = - CodeExtractor(BlocksToExtractVec, /* DT */ nullptr, - /* AggregateArgs*/ false, /* BFI */ nullptr, - /* BPI */ nullptr, /* AC */ nullptr, - /* AllowVarArgs */ false, /* AllowAlloca */ false, - /* Suffix */ "", KeepOldBlocks) + CodeExtractor(BlocksToExtractVec, + /* DT */ nullptr, + /* AggregateArgs*/ false, + /* BFI */ nullptr, + /* BPI */ nullptr, + /* AC */ nullptr, + /* AllowVarArgs */ false, + /* AllowAlloca */ false, + /* AllocationBlock */ nullptr, + /* Suffix */ "", + KeepOldBlocks) .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 35e5d2e5f3136..bcbc0ebe779c7 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1701,11 +1701,11 @@ void CodeExtractor::emitFunctionBody( BasicBlock *NewTarget = ExitBlockMap[OldTarget]; assert(NewTarget && "Unknown target block!"); - if (!KeepOldBlocks) { + if (KeepOldBlocks) { + VMap[OldTarget] = NewTarget; + } else { // rewrite the original branch instruction with this new target TI->setSuccessor(i, NewTarget); - } else { - VMap[OldTarget] = NewTarget; } } } @@ -1756,11 +1756,6 @@ void CodeExtractor::emitFunctionBody( // result restore will be placed in the outlined function. ScalarAI = newFunction->arg_begin(); unsigned AggIdx = 0; - - if (KeepOldBlocks) - OutI = cast(VMap.lookup(OutI)); - - for (Value *Input : inputs) { if (StructValues.contains(Input)) ++AggIdx; @@ -1768,7 +1763,10 @@ void CodeExtractor::emitFunctionBody( ++ScalarAI; } - for (Value *Output : outputs) { + for (Value* Output : outputs) { + if (KeepOldBlocks) + Output = VMap.lookup(Output); + // Find proper insertion point. // In case Output is an invoke, we insert the store at the beginning in the // 'normal destination' BB. Otherwise we insert the store right after From bb0043a45696ae4bbc92c0b7820ef5e518e0d94f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 31 Mar 2022 17:31:18 -0500 Subject: [PATCH 100/130] clang-format --- .../llvm/Transforms/Utils/CodeExtractor.h | 3 +-- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 24 +++++++++---------- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++----- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 17660f9d4dd08..472242257d670 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -147,8 +147,7 @@ class CodeExtractorAnalysisCache { AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr, - std::string Suffix = "", - bool KeepOldBlocks = false); + std::string Suffix = "", bool KeepOldBlocks = false); /// Create a code extractor for a loop body. /// diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 53ca44c36cd7a..6e2b9f04f2dbc 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -229,19 +229,17 @@ bool BlockExtractor::runOnModule(Module &M) { Changed = true; } CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); - Function *F = - CodeExtractor(BlocksToExtractVec, - /* DT */ nullptr, - /* AggregateArgs*/ false, - /* BFI */ nullptr, - /* BPI */ nullptr, - /* AC */ nullptr, - /* AllowVarArgs */ false, - /* AllowAlloca */ false, - /* AllocationBlock */ nullptr, - /* Suffix */ "", - KeepOldBlocks) - .extractCodeRegion(CEAC); + Function *F = CodeExtractor(BlocksToExtractVec, + /* DT */ nullptr, + /* AggregateArgs*/ false, + /* BFI */ nullptr, + /* BPI */ nullptr, + /* AC */ nullptr, + /* AllowVarArgs */ false, + /* AllowAlloca */ false, + /* AllocationBlock */ nullptr, + /* Suffix */ "", KeepOldBlocks) + .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bcbc0ebe779c7..cd8c34a18748e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -253,7 +253,8 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - BasicBlock *AllocationBlock, std::string Suffix, bool KeepOldBlocks) + BasicBlock *AllocationBlock, std::string Suffix, + bool KeepOldBlocks) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), @@ -266,7 +267,8 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), KeepOldBlocks(false), + BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), + KeepOldBlocks(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, /* AllowAlloca */ false, @@ -1702,7 +1704,7 @@ void CodeExtractor::emitFunctionBody( assert(NewTarget && "Unknown target block!"); if (KeepOldBlocks) { - VMap[OldTarget] = NewTarget; + VMap[OldTarget] = NewTarget; } else { // rewrite the original branch instruction with this new target TI->setSuccessor(i, NewTarget); @@ -1763,9 +1765,9 @@ void CodeExtractor::emitFunctionBody( ++ScalarAI; } - for (Value* Output : outputs) { - if (KeepOldBlocks) - Output = VMap.lookup(Output); + for (Value *Output : outputs) { + if (KeepOldBlocks) + Output = VMap.lookup(Output); // Find proper insertion point. // In case Output is an invoke, we insert the store at the beginning in the From d59c976ba198bdf93e6632e3d6daf26462e378be Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 12 Apr 2022 23:37:32 -0500 Subject: [PATCH 101/130] Address review --- llvm/include/llvm/Transforms/Utils/Cloning.h | 4 ++-- llvm/tools/llvm-extract/llvm-extract.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 415c4f1b50947..8a733807f249b 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,8 +114,8 @@ struct ClonedCodeInfo { /// parameter. /// /// If you would like to clone only a subset of instructions in the basic block, -/// you can specify a callback returning true only for those instructions that -/// are to be cloned. +/// you can specify a callback that returns true only for those instructions +/// that are to be cloned with the optional seventh paramter. BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 7df251179111d..fac057e49b22c 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -85,7 +85,7 @@ static cl::list ExtractBlocks( "Specify pairs to extract.\n" "Each pair will create a function.\n" "If multiple basic blocks are specified in one pair,\n" - "the first block in the sequence should dominate the rest (Unlsess " + "the first block in the sequence should dominate the rest (Unless " "using --bb-keep-blocks).\n" "eg:\n" " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" From ead347b114a86c759541f5f099d8a2181064fe8f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 20 Apr 2022 15:53:06 -0500 Subject: [PATCH 102/130] Introduce --replace-with call to replace --bb-keep-functions and --bb-keep-blocks. Make the interface easier and --bb-keep-functions alone can result in invalid IR. --- .../llvm-extract/extract-block-cleanup.ll | 2 +- .../extract-block-multiple-exits.ll | 2 +- .../tools/llvm-extract/extract-block-sink.ll | 2 +- llvm/test/tools/llvm-extract/extract-block.ll | 4 ++-- .../extract-blocks-with-groups.ll | 4 ++-- llvm/tools/llvm-extract/llvm-extract.cpp | 19 ++++++++----------- 6 files changed, 15 insertions(+), 18 deletions(-) diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll index bbf656fe696f6..3c2181c249640 100644 --- a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -1,4 +1,4 @@ -; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s +; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" --replace-with-call %s | FileCheck %s ; CHECK-LABEL: define void @foo(i32* %arg, i1 %c) { diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index b4c0667b9a58d..90799593237b6 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -1,4 +1,4 @@ -; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s +; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" --replace-with-call %s | FileCheck %s ; CHECK-LABEL: define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll index 2bf743a718c07..c6f33757b820a 100644 --- a/llvm/test/tools/llvm-extract/extract-block-sink.ll +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -1,4 +1,4 @@ -; RUN: llvm-extract -S -bb "foo:region_start" %s --bb-keep-functions --bb-keep-blocks | FileCheck %s +; RUN: llvm-extract -S -bb "foo:region_start" --replace-with-call %s | FileCheck %s diff --git a/llvm/test/tools/llvm-extract/extract-block.ll b/llvm/test/tools/llvm-extract/extract-block.ll index 4849e1e917783..fd1554f21240b 100644 --- a/llvm/test/tools/llvm-extract/extract-block.ll +++ b/llvm/test/tools/llvm-extract/extract-block.ll @@ -1,5 +1,5 @@ -; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s --check-prefixes=CHECK,KILL -; RUN: llvm-extract -S -bb foo:bb4 %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP +; RUN: llvm-extract -S -bb foo:bb4 %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -S -bb foo:bb4 --replace-with-call %s | FileCheck %s --check-prefixes=CHECK,KEEP ; CHECK: declare void @bar() diff --git a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll index eda65dc6269df..387afbfc42795 100644 --- a/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll +++ b/llvm/test/tools/llvm-extract/extract-blocks-with-groups.ll @@ -1,5 +1,5 @@ -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s --check-prefixes=CHECK,KILL -; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s --bb-keep-functions --bb-keep-blocks | FileCheck %s --check-prefixes=CHECK,KEEP +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' -S %s | FileCheck %s --check-prefixes=CHECK,KILL +; RUN: llvm-extract -bb 'foo:if;then;else' -bb 'bar:bb14;bb20' --replace-with-call -S %s | FileCheck %s --check-prefixes=CHECK,KEEP ; Extract two groups of basic blocks in two different functions. diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index fac057e49b22c..298a4e88f136c 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -92,22 +92,18 @@ static cl::list ExtractBlocks( " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " "with bb2."), cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), - cl::cat(ExtractCat)); + cl::cat(ExtractCat)) + ; + -static cl::opt KeepFunctions( - "bb-keep-functions", +static cl::opt ReplaceWithCall( + "replace-with-call", cl::desc( "When extracting blocks from functions, keep the original functions; " "extracted code is replaced by function call to new function"), cl::cat(ExtractCat)); -static cl::opt KeepBlocks( - "bb-keep-blocks", - cl::desc("Keep extracted blocks in original function after outlining. This " - "permits branches to any selected basic block from outside the " - "selection and overlapping code regions, but only branches to the " - "first in the group will call the extracted function."), - cl::cat(ExtractCat)); + // ExtractAlias - The alias to extract from the module. static cl::list @@ -375,7 +371,8 @@ int main(int argc, char **argv) { } legacy::PassManager PM; - PM.add(createBlockExtractorPass(GroupOfBBs, !KeepFunctions, KeepBlocks)); + PM.add(createBlockExtractorPass(GroupOfBBs, !ReplaceWithCall, ReplaceWithCall)); + // TODO: Remove BBs from original function that have become dead. PM.run(*M); } From 7d5bbda395551ec5718f47ae4d468cf12d9bfa6a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 20:47:42 -0600 Subject: [PATCH 103/130] Avoid deprecation warnings --- llvm/include/llvm/IR/LegacyPassManagers.h | 2 ++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 41c11d26aa456..95948d6f1b3ce 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -156,6 +156,8 @@ class PMStack { /// PMTopLevelManager manages LastUser info and collects common APIs used by /// top level pass managers. class PMTopLevelManager { + llvm::DenseMap,Pass*> AnalysisImpls; + protected: explicit PMTopLevelManager(PMDataManager *PMDM); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index e75f0d5e2ca11..59c37b16dd61b 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1031,9 +1031,9 @@ Function *CodeExtractor::constructFunctionDeclaration( // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) + if (Count.has_value()) newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + ProfileCount(Count.value(), Function::PCT_Real)); // FIXME } return newFunction; From d1dc14f028d76facb63b0e467d091f097a637f98 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 20:52:17 -0600 Subject: [PATCH 104/130] Use range-for --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 59c37b16dd61b..93ebd936efd27 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1899,8 +1899,8 @@ void CodeExtractor::insertReplacerCall( for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *load = Reloads[i]; std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); + for (User *U : Users) { + Instruction *inst = cast(U); if (inst->getParent()->getParent() == oldFunction) inst->replaceUsesOfWith(outputs[i], load); } From e2d1e2183a9615c669392eefcfe632cc0b59a649 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 21:17:59 -0600 Subject: [PATCH 105/130] NumExitBlocks -> SwitchCases.size() --- llvm/include/llvm/IR/LegacyPassManagers.h | 2 -- .../llvm/Transforms/Utils/CodeExtractor.h | 3 +-- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 17 ++++++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 95948d6f1b3ce..41c11d26aa456 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -156,8 +156,6 @@ class PMStack { /// PMTopLevelManager manages LastUser info and collects common APIs used by /// top level pass managers. class PMTopLevelManager { - llvm::DenseMap,Pass*> AnalysisImpls; - protected: explicit PMTopLevelManager(PMDataManager *PMDM); diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index f4ea425a309e3..23906285ed2fb 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -103,7 +103,6 @@ class CodeExtractorAnalysisCache { // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; - unsigned NumExitBlocks = std::numeric_limits::max(); Type *RetTy; /// Lists of blocks that are branched from the code region to be extracted. @@ -245,7 +244,7 @@ class CodeExtractorAnalysisCache { getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; - /// Updates the list of exit blocks (OldTargets and ExitBlocks) after + /// Updates the list of SwitchCases (corresponding to exit blocks) after /// changes of the control flow or the Blocks list. void recomputeExitBlocks(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index be340eea824c0..95d766e42bdd9 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -817,7 +817,7 @@ Function *CodeExtractor::constructFunctionDeclaration( Module *M = Blocks.front()->getModule(); // This function returns unsigned, outputs will go back by reference. - switch (NumExitBlocks) { + switch (SwitchCases.size()) { case 0: case 1: RetTy = Type::getVoidTy(Context); @@ -1484,7 +1484,6 @@ void CodeExtractor::recomputeExitBlocks() { SwitchCases.push_back(Succ); } } - NumExitBlocks = ExitBlocks.size(); } void CodeExtractor::emitFunctionBody( @@ -1569,8 +1568,8 @@ void CodeExtractor::emitFunctionBody( ExitBlockMap[OldTarget] = NewTarget; Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { + assert(SwitchCases.size() < 0xffff && "too many exit blocks for switch"); + switch (SwitchCases.size()) { case 0: case 1: break; // No value needed. @@ -1736,7 +1735,7 @@ CallInst *CodeExtractor::emitReplacerCall( // Emit the call to the function CallInst *call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); + SwitchCases.size() > 1 ? "targetBlock" : "", codeReplacer); // Set swifterror parameter attributes. unsigned ParamIdx = 0; @@ -1797,7 +1796,7 @@ CallInst *CodeExtractor::emitReplacerCall( // Now that we've done the deed, simplify the switch instruction. Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { + switch (SwitchCases.size()) { case 0: // There are no successors (the block containing the switch itself), which // means that previously this was the last part of the function, and hence @@ -1836,9 +1835,9 @@ CallInst *CodeExtractor::emitReplacerCall( // Otherwise, make the default destination of the switch instruction be one // of the other successors. TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(SwitchCases.size())); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, SwitchCases.size() - 1)); break; } @@ -1904,7 +1903,7 @@ void CodeExtractor::insertReplacerCall( } // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) + if (BFI && SwitchCases.size() > 1) calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); } From 179252fd3f3fe161abce055fefeaf18df8a1d8ce Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 21:31:10 -0600 Subject: [PATCH 106/130] clang-format --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 95d766e42bdd9..3ed0bd5030ed5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1733,9 +1733,9 @@ CallInst *CodeExtractor::emitReplacerCall( } // Emit the call to the function - CallInst *call = - CallInst::Create(newFunction, params, - SwitchCases.size() > 1 ? "targetBlock" : "", codeReplacer); + CallInst *call = CallInst::Create(newFunction, params, + SwitchCases.size() > 1 ? "targetBlock" : "", + codeReplacer); // Set swifterror parameter attributes. unsigned ParamIdx = 0; @@ -1837,7 +1837,8 @@ CallInst *CodeExtractor::emitReplacerCall( TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(SwitchCases.size())); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, SwitchCases.size() - 1)); + TheSwitch->removeCase( + SwitchInst::CaseIt(TheSwitch, SwitchCases.size() - 1)); break; } From f1ef796612966bb98333435691f97dd8fd57e2a6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 22:59:20 -0600 Subject: [PATCH 107/130] Add alloca block test --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 +- .../Transforms/Utils/CodeExtractorTest.cpp | 61 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3ed0bd5030ed5..2546bf8b30312 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1701,7 +1701,9 @@ CallInst *CodeExtractor::emitReplacerCall( for (Value *output : outputs) { if (StructValues.contains(output)) continue; - + + + AllocaInst *alloca = new AllocaInst( output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc", &*AllocaBlock->getFirstInsertionPt()); diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index b0294ef8a3be6..d3bbd88c00a35 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -30,6 +31,15 @@ BasicBlock *getBlockByName(Function *F, StringRef name) { return nullptr; } +Instruction *getInstByName(Function *F, StringRef Name) { + for (Instruction &I : instructions(F)) + if (I.getName() == Name) + return &I; + return nullptr; +} + + + TEST(CodeExtractor, ExitStub) { LLVMContext Ctx; SMDiagnostic Err; @@ -565,4 +575,55 @@ TEST(CodeExtractor, PartialAggregateArgs) { EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func)); } + +TEST(CodeExtractor, AllocaBlock) { + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M(parseAssemblyString(R"invalid( + define i32 @foo(i32 %x, i32 %y, i32 %z) { + entry: + br label %allocas + + allocas: + br label %body + + body: + %w = add i32 %x, %y + br label %notExtracted + + notExtracted: + %r = add i32 %w, %x + ret i32 %r + } + )invalid", + Err, Ctx)); + + Function *Func = M->getFunction("foo"); + SmallVector Candidates{getBlockByName(Func, "body")}; + + BasicBlock *AllocaBlock = getBlockByName(Func, "allocas"); + CodeExtractor CE(Candidates,nullptr,true,nullptr,nullptr,nullptr,false,false,AllocaBlock ); + CE.excludeArgFromAggregate(Func->getArg(0)); + CE.excludeArgFromAggregate(getInstByName(Func, "w")); + EXPECT_TRUE(CE.isEligible()); + + CodeExtractorAnalysisCache CEAC(*Func); + SetVector Inputs, Outputs; + Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs); + EXPECT_TRUE(Outlined); + EXPECT_FALSE(verifyFunction(*Outlined)); + EXPECT_FALSE(verifyFunction(*Func)); + + // The only added allocas may be in the dedicated alloca block. There should be one alloca for the struct, and another one for the reload value. + int NumAllocas = 0; + for (Instruction &I: instructions(Func)) { + if (!isa(I)) continue ; + EXPECT_EQ( I.getParent() ,AllocaBlock); + NumAllocas += 1; + } + EXPECT_EQ(NumAllocas, 2); +} + + + } // end anonymous namespace From a80f7a6a19f26b0aead4c3d17429a71a8b456a2f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 23:00:01 -0600 Subject: [PATCH 108/130] clang-format --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 +- .../Transforms/Utils/CodeExtractorTest.cpp | 75 +++++++++---------- 2 files changed, 38 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 2546bf8b30312..3ed0bd5030ed5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1701,9 +1701,7 @@ CallInst *CodeExtractor::emitReplacerCall( for (Value *output : outputs) { if (StructValues.contains(output)) continue; - - - + AllocaInst *alloca = new AllocaInst( output->getType(), DL.getAllocaAddrSpace(), nullptr, output->getName() + ".loc", &*AllocaBlock->getFirstInsertionPt()); diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index d3bbd88c00a35..1111a13798afa 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/AsmParser/Parser.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -32,14 +32,12 @@ BasicBlock *getBlockByName(Function *F, StringRef name) { } Instruction *getInstByName(Function *F, StringRef Name) { - for (Instruction &I : instructions(F)) - if (I.getName() == Name) - return &I; - return nullptr; + for (Instruction &I : instructions(F)) + if (I.getName() == Name) + return &I; + return nullptr; } - - TEST(CodeExtractor, ExitStub) { LLVMContext Ctx; SMDiagnostic Err; @@ -577,9 +575,9 @@ TEST(CodeExtractor, PartialAggregateArgs) { } TEST(CodeExtractor, AllocaBlock) { - LLVMContext Ctx; - SMDiagnostic Err; - std::unique_ptr M(parseAssemblyString(R"invalid( + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M(parseAssemblyString(R"invalid( define i32 @foo(i32 %x, i32 %y, i32 %z) { entry: br label %allocas @@ -596,34 +594,35 @@ TEST(CodeExtractor, AllocaBlock) { ret i32 %r } )invalid", - Err, Ctx)); - - Function *Func = M->getFunction("foo"); - SmallVector Candidates{getBlockByName(Func, "body")}; - - BasicBlock *AllocaBlock = getBlockByName(Func, "allocas"); - CodeExtractor CE(Candidates,nullptr,true,nullptr,nullptr,nullptr,false,false,AllocaBlock ); - CE.excludeArgFromAggregate(Func->getArg(0)); - CE.excludeArgFromAggregate(getInstByName(Func, "w")); - EXPECT_TRUE(CE.isEligible()); - - CodeExtractorAnalysisCache CEAC(*Func); - SetVector Inputs, Outputs; - Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs); - EXPECT_TRUE(Outlined); - EXPECT_FALSE(verifyFunction(*Outlined)); - EXPECT_FALSE(verifyFunction(*Func)); - - // The only added allocas may be in the dedicated alloca block. There should be one alloca for the struct, and another one for the reload value. - int NumAllocas = 0; - for (Instruction &I: instructions(Func)) { - if (!isa(I)) continue ; - EXPECT_EQ( I.getParent() ,AllocaBlock); - NumAllocas += 1; - } - EXPECT_EQ(NumAllocas, 2); -} + Err, Ctx)); + Function *Func = M->getFunction("foo"); + SmallVector Candidates{getBlockByName(Func, "body")}; + + BasicBlock *AllocaBlock = getBlockByName(Func, "allocas"); + CodeExtractor CE(Candidates, nullptr, true, nullptr, nullptr, nullptr, false, + false, AllocaBlock); + CE.excludeArgFromAggregate(Func->getArg(0)); + CE.excludeArgFromAggregate(getInstByName(Func, "w")); + EXPECT_TRUE(CE.isEligible()); + + CodeExtractorAnalysisCache CEAC(*Func); + SetVector Inputs, Outputs; + Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs); + EXPECT_TRUE(Outlined); + EXPECT_FALSE(verifyFunction(*Outlined)); + EXPECT_FALSE(verifyFunction(*Func)); + // The only added allocas may be in the dedicated alloca block. There should + // be one alloca for the struct, and another one for the reload value. + int NumAllocas = 0; + for (Instruction &I : instructions(Func)) { + if (!isa(I)) + continue; + EXPECT_EQ(I.getParent(), AllocaBlock); + NumAllocas += 1; + } + EXPECT_EQ(NumAllocas, 2); +} } // end anonymous namespace From 853d6dbffaac5bd4767a92990586c8021eb38e73 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 23:03:56 -0600 Subject: [PATCH 109/130] rename to recomputeSwitchCases --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 23906285ed2fb..83cc7926b8f2d 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -246,7 +246,7 @@ class CodeExtractorAnalysisCache { /// Updates the list of SwitchCases (corresponding to exit blocks) after /// changes of the control flow or the Blocks list. - void recomputeExitBlocks(); + void recomputeSwitchCases(); void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3ed0bd5030ed5..72be3bae31910 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1372,7 +1372,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, Instruction *TI = HoistToBlock->getTerminator(); for (auto *II : HoistingCands) cast(II)->moveBefore(TI); - recomputeExitBlocks(); + recomputeSwitchCases(); } // CFG/ExitBlocks must not change hereafter @@ -1466,11 +1466,11 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&header) { // region, create a new PHI for those values within the region such that only // PHI itself becomes an output value, not each of its incoming values // individually. - recomputeExitBlocks(); + recomputeSwitchCases(); severSplitPHINodesOfExits(); } -void CodeExtractor::recomputeExitBlocks() { +void CodeExtractor::recomputeSwitchCases() { SwitchCases.clear(); SmallPtrSet ExitBlocks; From 39963916b226298ada4a82c57d21e544243f7cee Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 23:22:10 -0600 Subject: [PATCH 110/130] refactor getSwitchType --- .../llvm/Transforms/Utils/CodeExtractor.h | 5 ++- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 43 +++++++++++-------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 83cc7926b8f2d..365b9c4eb6371 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -103,7 +103,7 @@ class CodeExtractorAnalysisCache { // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; - Type *RetTy; + /// Lists of blocks that are branched from the code region to be extracted. /// Each block is contained at most once. Its order defines the return value @@ -248,6 +248,9 @@ class CodeExtractorAnalysisCache { /// changes of the control flow or the Blocks list. void recomputeSwitchCases(); + /// Return the type used for the return code of the extracted function to indicate which exit block to jump to. + Type * getSwitchType(); + void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); void splitReturnBlocks(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 72be3bae31910..fc6dcf347233e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -813,22 +813,11 @@ Function *CodeExtractor::constructFunctionDeclaration( LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); Function *oldFunction = Blocks.front()->getParent(); - LLVMContext &Context = oldFunction->getContext(); Module *M = Blocks.front()->getModule(); - // This function returns unsigned, outputs will go back by reference. - switch (SwitchCases.size()) { - case 0: - case 1: - RetTy = Type::getVoidTy(Context); - break; - case 2: - RetTy = Type::getInt1Ty(Context); - break; - default: - RetTy = Type::getInt16Ty(Context); - break; - } + + + // Assemble the function's parameter lists. std::vector ParamTy; @@ -869,6 +858,7 @@ Function *CodeExtractor::constructFunctionDeclaration( ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace())); } + Type *RetTy = getSwitchType(); LLVM_DEBUG({ dbgs() << "Function type: " << *RetTy << " f("; for (Type *i : ParamTy) @@ -1486,6 +1476,23 @@ void CodeExtractor::recomputeSwitchCases() { } } +Type *CodeExtractor:: getSwitchType() { + LLVMContext &Context = Blocks.front()->getContext(); + + assert(SwitchCases.size() < 0xffff && "too many exit blocks for switch"); + switch (SwitchCases.size()) { + case 0: + case 1: + return Type::getVoidTy(Context); + case 2: + // Conditional branch, return a bool + return Type::getInt1Ty(Context); + default: + return Type::getInt16Ty(Context); + } +} + + void CodeExtractor::emitFunctionBody( const ValueSet &inputs, const ValueSet &outputs, const ValueSet &StructValues, Function *newFunction, @@ -1568,16 +1575,18 @@ void CodeExtractor::emitFunctionBody( ExitBlockMap[OldTarget] = NewTarget; Value *brVal = nullptr; + Type *RetTy = getSwitchType(); assert(SwitchCases.size() < 0xffff && "too many exit blocks for switch"); switch (SwitchCases.size()) { case 0: case 1: - break; // No value needed. + // No value needed. + break; case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + brVal = ConstantInt::get(RetTy, !SuccNum); break; default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + brVal = ConstantInt::get(RetTy, SuccNum); break; } From b878fe0669bfa98d9ec776c1b29a8acd924c0d65 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 10 Nov 2022 23:24:26 -0600 Subject: [PATCH 111/130] clang-format --- .../llvm/Transforms/Utils/CodeExtractor.h | 6 ++-- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 35 ++++++++----------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 365b9c4eb6371..0b75639646ad6 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -104,7 +104,6 @@ class CodeExtractorAnalysisCache { // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; - /// Lists of blocks that are branched from the code region to be extracted. /// Each block is contained at most once. Its order defines the return value /// of the extracted function, when leaving the extracted function via the @@ -248,8 +247,9 @@ class CodeExtractorAnalysisCache { /// changes of the control flow or the Blocks list. void recomputeSwitchCases(); - /// Return the type used for the return code of the extracted function to indicate which exit block to jump to. - Type * getSwitchType(); + /// Return the type used for the return code of the extracted function to + /// indicate which exit block to jump to. + Type *getSwitchType(); void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index fc6dcf347233e..3d9a4fe2bcbfa 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -815,10 +815,6 @@ Function *CodeExtractor::constructFunctionDeclaration( Function *oldFunction = Blocks.front()->getParent(); Module *M = Blocks.front()->getModule(); - - - - // Assemble the function's parameter lists. std::vector ParamTy; std::vector AggParamTy; @@ -1476,23 +1472,22 @@ void CodeExtractor::recomputeSwitchCases() { } } -Type *CodeExtractor:: getSwitchType() { - LLVMContext &Context = Blocks.front()->getContext(); +Type *CodeExtractor::getSwitchType() { + LLVMContext &Context = Blocks.front()->getContext(); - assert(SwitchCases.size() < 0xffff && "too many exit blocks for switch"); - switch (SwitchCases.size()) { - case 0: - case 1: - return Type::getVoidTy(Context); - case 2: - // Conditional branch, return a bool - return Type::getInt1Ty(Context); - default: - return Type::getInt16Ty(Context); - } + assert(SwitchCases.size() < 0xffff && "too many exit blocks for switch"); + switch (SwitchCases.size()) { + case 0: + case 1: + return Type::getVoidTy(Context); + case 2: + // Conditional branch, return a bool + return Type::getInt1Ty(Context); + default: + return Type::getInt16Ty(Context); + } } - void CodeExtractor::emitFunctionBody( const ValueSet &inputs, const ValueSet &outputs, const ValueSet &StructValues, Function *newFunction, @@ -1580,9 +1575,9 @@ void CodeExtractor::emitFunctionBody( switch (SwitchCases.size()) { case 0: case 1: - // No value needed. + // No value needed. break; - case 2: // Conditional branch, return a bool + case 2: // Conditional branch, return a bool brVal = ConstantInt::get(RetTy, !SuccNum); break; default: From d79fe0e6ccd445863220db674c427693f63ef1d2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 1 Nov 2024 15:54:28 +0100 Subject: [PATCH 112/130] Backport applied merge --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1317d69d6cd44..e8a48171a1393 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1742,7 +1742,7 @@ CallInst *CodeExtractor::emitReplacerCall( Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_" + input->getName()); - codeReplacer->getInstList().push_back(GEP); + GEP->insertAt(codeReplacer, codeReplacer->end()); new StoreInst(input, GEP, codeReplacer); ++AggIdx; @@ -1786,7 +1786,7 @@ CallInst *CodeExtractor::emitReplacerCall( Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); + GEP->insertAt(codeReplacer, codeReplacer->end()); Output = GEP; ++AggIdx; } else { From 67acce77313f9902ec31c30473363269ae537bfb Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 1 Nov 2024 17:37:54 +0100 Subject: [PATCH 113/130] apply insertAt->insertInto --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cae68f2926b50..3a012e24a4a05 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1739,7 +1739,7 @@ CallInst *CodeExtractor::emitReplacerCall( Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_" + input->getName()); - GEP->insertAt(codeReplacer, codeReplacer->end()); + GEP->insertInto(codeReplacer, codeReplacer->end()); new StoreInst(input, GEP, codeReplacer); ++AggIdx; @@ -1783,7 +1783,7 @@ CallInst *CodeExtractor::emitReplacerCall( Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - GEP->insertAt(codeReplacer, codeReplacer->end()); + GEP->insertInto(codeReplacer, codeReplacer->end()); Output = GEP; ++AggIdx; } else { From 22fa0b334d86e353eefce1cdee55544b2ad3ab68 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 1 Nov 2024 17:50:30 +0100 Subject: [PATCH 114/130] Re-apply last merge changes --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3a012e24a4a05..045544ea1ca91 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1015,7 +1015,7 @@ Function *CodeExtractor::constructFunctionDeclaration( auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); if (Count.has_value()) newFunction->setEntryCount( - ProfileCount(Count.value(), Function::PCT_Real)); // FIXME + ProfileCount(*Count, Function::PCT_Real)); // FIXME } return newFunction; From 34cdd8ce606757c41b2d10467c435ef0ec6bf8fa Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 1 Nov 2024 23:37:07 +0100 Subject: [PATCH 115/130] merge fix --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 931c7c925ed26..7c728fc9592ee 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1108,14 +1108,6 @@ static void insertLifetimeMarkersSurroundingCall( } } - if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { - auto *StructSpaceCast = new AddrSpaceCastInst( - Struct, PointerType ::get(Context, 0), "structArg.ascast"); - StructSpaceCast->insertAfter(Struct); - params.push_back(StructSpaceCast); - } else { - params.push_back(Struct); - } void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = Blocks.front()->getParent(); auto newFuncIt = newFunction->begin(); @@ -1732,7 +1724,14 @@ CallInst *CodeExtractor::emitReplacerCall( if (!StructValues.empty()) { Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", &*AllocaBlock->getFirstInsertionPt()); - params.push_back(Struct); + if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { + auto *StructSpaceCast = new AddrSpaceCastInst( + Struct, PointerType ::get(Context, 0), "structArg.ascast"); + StructSpaceCast->insertAfter(Struct); + params.push_back(StructSpaceCast); + } else { + params.push_back(Struct); + } unsigned AggIdx = 0; for (Value *input : inputs) { From fc5ea7e7b4d940184988e3a8ad25a606cdfc0156 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 2 Nov 2024 02:50:24 +0100 Subject: [PATCH 116/130] Insert instructions using iterators --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 64 +++++++++++---------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f68c06981b5ba..726c6eb234e59 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1691,37 +1691,41 @@ void CodeExtractor::emitFunctionBody( // In case Output is an invoke, we insert the store at the beginning in the // 'normal destination' BB. Otherwise we insert the store right after // Output. - Instruction *InsertBefore = nullptr; + BasicBlock::iterator InsertPt; if (auto *InvokeI = dyn_cast(Output)) - InsertBefore = &*InvokeI->getNormalDest()->getFirstInsertionPt(); + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); else if (auto *Phi = dyn_cast(Output)) - InsertBefore = &*Phi->getParent()->getFirstInsertionPt(); + InsertPt = Phi->getParent()->getFirstInsertionPt(); else if (auto *OutI = dyn_cast(Output)) - InsertBefore = &*std::next(OutI->getIterator()); + InsertPt = std::next(OutI->getIterator()); + else { + // Globals don't need to be updated, just advance to the next argument. + if (StructValues.contains(Output)) + ++AggIdx; + else + ++ScalarAI; + continue; + } - assert((!InsertBefore || InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && + assert((InsertPt->getFunction() == newFunction || + Blocks.count(InsertPt->getParent())) && "InsertPt should be in new function"); if (StructValues.contains(Output)) { - if (InsertBefore) { - assert(AggArg && "Number of aggregate output arguments should match " - "the number of defined values"); - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, AggArg, Idx, "gep_" + Output->getName(), InsertBefore); - new StoreInst(Output, GEP, InsertBefore); - } + assert(AggArg && "Number of aggregate output arguments should match " + "the number of defined values"); + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), AggIdx); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, AggArg, Idx, "gep_" + Output->getName(), InsertPt); + new StoreInst(Output, GEP, InsertPt); ++AggIdx; } else { - if (InsertBefore) { - assert(ScalarAI != newFunction->arg_end() && - "Number of scalar output arguments should match " - "the number of defined values"); - new StoreInst(Output, &*ScalarAI, InsertBefore); - } + assert(ScalarAI != newFunction->arg_end() && + "Number of scalar output arguments should match " + "the number of defined values"); + new StoreInst(Output, &*ScalarAI, InsertPt); ++ScalarAI; } } @@ -1767,7 +1771,7 @@ CallInst *CodeExtractor::emitReplacerCall( AllocaInst *alloca = new AllocaInst( output->getType(), DL.getAllocaAddrSpace(), nullptr, - output->getName() + ".loc", &*AllocaBlock->getFirstInsertionPt()); + output->getName() + ".loc", AllocaBlock->getFirstInsertionPt()); params.push_back(alloca); ReloadOutputs.push_back(alloca); } @@ -1775,7 +1779,7 @@ CallInst *CodeExtractor::emitReplacerCall( AllocaInst *Struct = nullptr; if (!StructValues.empty()) { Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", &*AllocaBlock->getFirstInsertionPt()); + "structArg", AllocaBlock->getFirstInsertionPt()); if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { auto *StructSpaceCast = new AddrSpaceCastInst( Struct, PointerType ::get(Context, 0), "structArg.ascast"); @@ -1874,15 +1878,17 @@ CallInst *CodeExtractor::emitReplacerCall( // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + ReturnInst::Create(Context, nullptr, + TheSwitch->getIterator()); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + ReturnInst::Create(Context, TheSwitch->getCondition(), + TheSwitch->getIterator()); } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), - TheSwitch); + TheSwitch->getIterator()); } TheSwitch->eraseFromParent(); @@ -1890,7 +1896,7 @@ CallInst *CodeExtractor::emitReplacerCall( case 1: // Only a single destination, change the switch into an unconditional // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getIterator()); TheSwitch->eraseFromParent(); break; case 2: @@ -1898,7 +1904,7 @@ CallInst *CodeExtractor::emitReplacerCall( // Remark: This also swaps the target branches: // 0 -> false -> getSuccessor(2); 1 -> true -> getSuccessor(1) BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); + call, TheSwitch->getIterator()); TheSwitch->eraseFromParent(); break; default: From b1b4fc1ae601a38d114f1a9834dc33a043bca762 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 2 Nov 2024 13:10:33 +0100 Subject: [PATCH 117/130] Re-apply --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 43 ++++++++------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d4a6d4889ed63..9e58e8a1c251b 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -996,18 +996,6 @@ Function *CodeExtractor::constructFunctionDeclaration( newFunction->addFnAttr(Attr); } - if (NumExitBlocks == 0) { - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - if (none_of(Blocks, [](const BasicBlock *BB) { - const Instruction *Term = BB->getTerminator(); - return isa(Term) || isa(Term); - })) - newFunction->setDoesNotReturn(); - } - - // Create scalar and aggregate iterators to name all of the arguments we // inserted. Function::arg_iterator ScalarAI = newFunction->arg_begin(); @@ -1488,16 +1476,6 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { - const Instruction *Term = BB.getTerminator(); - return isa(Term) || isa(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { newFunction->dump(); report_fatal_error("verification of newFunction failed!"); @@ -1742,6 +1720,17 @@ void CodeExtractor::emitFunctionBody( ++ScalarAI; } } + + if (SwitchCases.empty()) { + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + if (none_of(Blocks, [](const BasicBlock *BB) { + const Instruction *Term = BB->getTerminator(); + return isa(Term) || isa(Term); + })) + newFunction->setDoesNotReturn(); + } } CallInst *CodeExtractor::emitReplacerCall( @@ -1887,10 +1876,12 @@ CallInst *CodeExtractor::emitReplacerCall( case 0: // There are no successors (the block containing the switch itself), which // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { + // this should be rewritten as a `ret` or `unreachable`. + if (newFunction->doesNotReturn()) { + // If fn is no return, end with an unreachable terminator. + (void)new UnreachableInst(Context, TheSwitch->getIterator()); + } else if (OldFnRetTy->isVoidTy()) { + // We have no return value. ReturnInst::Create(Context, nullptr, TheSwitch->getIterator()); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { From cd3c85747ffe4904bb5bf6dc81a485ab641c184a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 2 Nov 2024 14:47:28 +0100 Subject: [PATCH 118/130] Remove unused varaible --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c444eff01699c..8dc4a2e3fd348 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1114,7 +1114,6 @@ static void insertLifetimeMarkersSurroundingCall( } void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = Blocks.front()->getParent(); auto newFuncIt = newFunction->begin(); for (BasicBlock *Block : Blocks) { // Delete the basic block from the old function, and the list of blocks From 4530a043a023e00ff41259333322a2280166f626 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 2 Nov 2024 19:39:52 +0100 Subject: [PATCH 119/130] Re-apply foreach conversion --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 45f79ac930a5f..b71933406fca1 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -2078,8 +2078,8 @@ void CodeExtractor::insertReplacerCall( Value *load = Reloads[i]; std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); + for (User *U : Users) { + Instruction *inst = cast(U); if (inst->getParent()->getParent() == oldFunction) inst->replaceUsesOfWith(outputs[i], load); } From c0b517618150a38c6b47759b07fcfd0fdede061c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 2 Nov 2024 19:46:52 +0100 Subject: [PATCH 120/130] Compile fix --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 24b015fd37edc..3b01efba33c5e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1496,7 +1496,7 @@ void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&header) { } // Exit nodes may have changed by SplitEdge. - recomputeExitBlocks(); + recomputeSwitchCases(); } } From 530e42985aa66ca501488c16b0af10b47295da2e Mon Sep 17 00:00:00 2001 From: "U-BERGUFFLEN\\meinersbur" Date: Sat, 2 Nov 2024 23:50:03 +0100 Subject: [PATCH 121/130] Compile fix --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 14e082c98ce62..f8f59094ba0bb 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1574,7 +1574,7 @@ void CodeExtractor::emitFunctionBody( Instruction *ClonedI = I->clone(); ClonedI->setName(I->getName()); - newFuncRoot->getInstList().insert(IP, ClonedI); + ClonedI->insertInto(newFuncRoot, IP); AdditionalRemap.push_back(ClonedI); VMap[I] = ClonedI; return ClonedI; From de012958b233662f13bbcacd4db3210a4250b344 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 3 Nov 2024 02:56:01 +0100 Subject: [PATCH 122/130] Repair --replace-with-all for NPM --- llvm/include/llvm/Transforms/IPO/BlockExtractor.h | 3 ++- llvm/lib/Passes/PassRegistry.def | 2 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 12 +++++++----- llvm/tools/llvm-extract/llvm-extract.cpp | 4 +++- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/BlockExtractor.h b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h index 6211027bd672a..e62af105e283d 100644 --- a/llvm/include/llvm/Transforms/IPO/BlockExtractor.h +++ b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h @@ -24,12 +24,13 @@ class BasicBlock; struct BlockExtractorPass : PassInfoMixin { BlockExtractorPass(std::vector> &&GroupsOfBlocks, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::vector> GroupsOfBlocks; bool EraseFunctions; + bool KeepOldBlocks; }; } // namespace llvm diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 4aec1b85c27ba..b6da340743228 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -56,7 +56,7 @@ MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("debugify", NewPMDebugifyPass()) MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) -MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) +MODULE_PASS("extract-blocks", BlockExtractorPass({}, false, false)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globaldce", GlobalDCEPass()) diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 559e3f4ae9da2..dab242d5ed015 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -43,7 +43,7 @@ static cl::opt namespace { class BlockExtractor { public: - BlockExtractor(bool EraseFunctions, bool KeepOldBlocks = false) + BlockExtractor(bool EraseFunctions, bool KeepOldBlocks) : EraseFunctions(EraseFunctions), KeepOldBlocks(KeepOldBlocks) {} bool runOnModule(Module &M); void @@ -183,7 +183,8 @@ bool BlockExtractor::runOnModule(Module &M) { /* AllowVarArgs */ false, /* AllowAlloca */ false, /* AllocationBlock */ nullptr, - /* Suffix */ "", KeepOldBlocks) + /* Suffix */ "", + /* KeepOldBlocks */ KeepOldBlocks) .extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() @@ -211,12 +212,13 @@ bool BlockExtractor::runOnModule(Module &M) { BlockExtractorPass::BlockExtractorPass( std::vector> &&GroupsOfBlocks, - bool EraseFunctions) - : GroupsOfBlocks(GroupsOfBlocks), EraseFunctions(EraseFunctions) {} + bool EraseFunctions, bool KeepOldBlocks) + : GroupsOfBlocks(GroupsOfBlocks), EraseFunctions(EraseFunctions), + KeepOldBlocks(KeepOldBlocks) {} PreservedAnalyses BlockExtractorPass::run(Module &M, ModuleAnalysisManager &AM) { - BlockExtractor BE(EraseFunctions); + BlockExtractor BE(EraseFunctions, KeepOldBlocks); BE.init(GroupsOfBlocks); return BE.runOnModule(M) ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 3f8988e5e6863..410694c7a0e65 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -392,7 +392,9 @@ int main(int argc, char **argv) { PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); ModulePassManager PM; - PM.addPass(BlockExtractorPass(std::move(GroupOfBBs), true)); + PM.addPass(BlockExtractorPass(std::move(GroupOfBBs), + /*EraseFunction=*/!ReplaceWithCall, + /*KeepOldBlocks=*/ReplaceWithCall)); PM.run(*M, MAM); } From fc4ec3402313bdb6669877f433e39d858dfb7f93 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 3 Nov 2024 22:43:53 +0100 Subject: [PATCH 123/130] opaque ptr --- .../llvm-extract/extract-block-cleanup.ll | 18 +-- .../extract-block-multiple-exits.ll | 136 +++++++++--------- .../tools/llvm-extract/extract-block-sink.ll | 39 +++-- 3 files changed, 91 insertions(+), 102 deletions(-) diff --git a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll index 3c2181c249640..8b44645c4149a 100644 --- a/llvm/test/tools/llvm-extract/extract-block-cleanup.ll +++ b/llvm/test/tools/llvm-extract/extract-block-cleanup.ll @@ -1,23 +1,23 @@ ; RUN: llvm-extract -S -bb "foo:region_start;extractonly;cleanup;fallback;region_end" --replace-with-call %s | FileCheck %s -; CHECK-LABEL: define void @foo(i32* %arg, i1 %c) { +; CHECK-LABEL: define void @foo(ptr %arg, i1 %c) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 %c, label %codeRepl, label %outsideonly ; CHECK-EMPTY: ; CHECK-NEXT: outsideonly: -; CHECK-NEXT: store i32 0, i32* %arg, align 4 +; CHECK-NEXT: store i32 0, ptr %arg, align 4 ; CHECK-NEXT: br label %cleanup ; CHECK-EMPTY: ; CHECK-NEXT: codeRepl: -; CHECK-NEXT: %targetBlock = call i1 @foo.region_start(i32* %arg) +; CHECK-NEXT: %targetBlock = call i1 @foo.region_start(ptr %arg) ; CHECK-NEXT: br i1 %targetBlock, label %cleanup.return_crit_edge, label %region_end.split ; CHECK-EMPTY: ; CHECK-NEXT: region_start: ; CHECK-NEXT: br label %extractonly ; CHECK-EMPTY: ; CHECK-NEXT: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: store i32 1, ptr %arg, align 4 ; CHECK-NEXT: br label %cleanup ; CHECK-EMPTY: ; CHECK-NEXT: cleanup: @@ -47,7 +47,7 @@ ; CHECK-NEXT: } -; CHECK-LABEL: define internal i1 @foo.region_start(i32* %arg) { +; CHECK-LABEL: define internal i1 @foo.region_start(ptr %arg) { ; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: @@ -55,7 +55,7 @@ ; CHECK-NEXT: br label %extractonly ; CHECK-EMPTY: ; CHECK-NEXT: extractonly: -; CHECK-NEXT: store i32 1, i32* %arg, align 4 +; CHECK-NEXT: store i32 1, ptr %arg, align 4 ; CHECK-NEXT: br label %cleanup ; CHECK-EMPTY: ; CHECK-NEXT: cleanup: @@ -80,19 +80,19 @@ -define void @foo(i32* %arg, i1 %c) { +define void @foo(ptr %arg, i1 %c) { entry: br i1 %c, label %region_start, label %outsideonly outsideonly: - store i32 0, i32* %arg, align 4 + store i32 0, ptr %arg, align 4 br label %cleanup region_start: br label %extractonly extractonly: - store i32 1, i32* %arg, align 4 + store i32 1, ptr %arg, align 4 br label %cleanup cleanup: diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index 90799593237b6..2b907659d7237 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -1,7 +1,7 @@ ; RUN: llvm-extract -S -bb "func:region_start;exiting0;exiting1" --replace-with-call %s | FileCheck %s -; CHECK-LABEL: define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +; CHECK-LABEL: define void @func(ptr %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { ; CHECK-NEXT: entry: ; CHECK-NEXT: %B.ce.loc = alloca i32, align 4 ; CHECK-NEXT: %c.loc = alloca i32, align 4 @@ -9,24 +9,20 @@ ; CHECK-NEXT: %a.loc = alloca i32, align 4 ; CHECK-NEXT: br i1 %c0, label %codeRepl, label %exit ; CHECK-EMPTY: -; CHECK-NEXT: codeRepl: -; CHECK-NEXT: %lt.cast = bitcast i32* %a.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast) -; CHECK-NEXT: %lt.cast1 = bitcast i32* %b.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1) -; CHECK-NEXT: %lt.cast2 = bitcast i32* %c.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast2) -; CHECK-NEXT: %lt.cast3 = bitcast i32* %B.ce.loc to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast3) -; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.loc, i32* %b.loc, i32* %c.loc, i32* %B.ce.loc) -; CHECK-NEXT: %a.reload = load i32, i32* %a.loc, align 4 -; CHECK-NEXT: %b.reload = load i32, i32* %b.loc, align 4 -; CHECK-NEXT: %c.reload = load i32, i32* %c.loc, align 4 -; CHECK-NEXT: %B.ce.reload = load i32, i32* %B.ce.loc, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast1) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast2) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast3) +; CHECK-NEXT: codeRepl: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr %a.loc) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr %b.loc) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr %c.loc) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.ce.loc) +; CHECK-NEXT: %targetBlock = call i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, ptr %a.loc, ptr %b.loc, ptr %c.loc, ptr %B.ce.loc) +; CHECK-NEXT: %a.reload = load i32, ptr %a.loc, align 4 +; CHECK-NEXT: %b.reload = load i32, ptr %b.loc, align 4 +; CHECK-NEXT: %c.reload = load i32, ptr %c.loc, align 4 +; CHECK-NEXT: %B.ce.reload = load i32, ptr %B.ce.loc, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr %a.loc) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr %b.loc) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr %c.loc) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.ce.loc) ; CHECK-NEXT: switch i16 %targetBlock, label %exit0 [ ; CHECK-NEXT: i16 0, label %exiting0.exit_crit_edge ; CHECK-NEXT: i16 1, label %fallback @@ -34,19 +30,19 @@ ; CHECK-NEXT: i16 3, label %exit2 ; CHECK-NEXT: ] ; CHECK-EMPTY: -; CHECK-NEXT: region_start: +; CHECK-NEXT: region_start: ; CHECK-NEXT: %a = add i32 42, 1 ; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 ; CHECK-EMPTY: -; CHECK-NEXT: exiting0: +; CHECK-NEXT: exiting0: ; CHECK-NEXT: %b = add i32 42, 2 ; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge, label %exit0.split ; CHECK-EMPTY: -; CHECK-NEXT: exiting0.exit_crit_edge: -; CHECK-NEXT: %b.merge_with_extracted7 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] +; CHECK-NEXT: exiting0.exit_crit_edge: +; CHECK-NEXT: %b.merge_with_extracted4 = phi i32 [ %b.reload, %codeRepl ], [ %b, %exiting0 ] ; CHECK-NEXT: br label %exit ; CHECK-EMPTY: -; CHECK-NEXT: exiting1: +; CHECK-NEXT: exiting1: ; CHECK-NEXT: %c = add i32 42, 3 ; CHECK-NEXT: switch i8 %dest, label %fallback [ ; CHECK-NEXT: i8 0, label %exit0.split @@ -55,67 +51,67 @@ ; CHECK-NEXT: i8 3, label %exit0.split ; CHECK-NEXT: ] ; CHECK-EMPTY: -; CHECK-NEXT: fallback: +; CHECK-NEXT: fallback: ; CHECK-NEXT: unreachable ; CHECK-EMPTY: -; CHECK-NEXT: exit: -; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted7, %exiting0.exit_crit_edge ] -; CHECK-NEXT: store i32 %A, i32* %arg, align 4 +; CHECK-NEXT: exit: +; CHECK-NEXT: %A = phi i32 [ 42, %entry ], [ %b.merge_with_extracted4, %exiting0.exit_crit_edge ] +; CHECK-NEXT: store i32 %A, ptr %arg, align 4 ; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: exit0.split: -; CHECK-NEXT: %b.merge_with_extracted6 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] +; CHECK-NEXT: exit0.split: +; CHECK-NEXT: %b.merge_with_extracted3 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] ; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] ; CHECK-NEXT: br label %exit0 ; CHECK-EMPTY: -; CHECK-NEXT: exit0: +; CHECK-NEXT: exit0: ; CHECK-NEXT: %B.ce.merge_with_extracted = phi i32 [ %B.ce.reload, %codeRepl ], [ %B.ce, %exit0.split ] -; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted6, %exit0.split ] -; CHECK-NEXT: %a.merge_with_extracted5 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] -; CHECK-NEXT: store i32 %a.merge_with_extracted5, i32* %arg, align 4 -; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: %b.merge_with_extracted = phi i32 [ %b.reload, %codeRepl ], [ %b.merge_with_extracted3, %exit0.split ] +; CHECK-NEXT: %a.merge_with_extracted2 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exit0.split ] +; CHECK-NEXT: store i32 %a.merge_with_extracted2, ptr %arg, align 4 +; CHECK-NEXT: store i32 %B.ce.merge_with_extracted, ptr %arg, align 4 ; CHECK-NEXT: br label %after ; CHECK-EMPTY: -; CHECK-NEXT: exit1: -; CHECK-NEXT: %c.merge_with_extracted8 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] -; CHECK-NEXT: %a.merge_with_extracted4 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] +; CHECK-NEXT: exit1: +; CHECK-NEXT: %c.merge_with_extracted5 = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] +; CHECK-NEXT: %a.merge_with_extracted1 = phi i32 [ %a.reload, %codeRepl ], [ %a, %exiting1 ] ; CHECK-NEXT: br label %after ; CHECK-EMPTY: -; CHECK-NEXT: exit2: +; CHECK-NEXT: exit2: ; CHECK-NEXT: %c.merge_with_extracted = phi i32 [ %c.reload, %codeRepl ], [ %c, %exiting1 ] -; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: store i32 %c.merge_with_extracted, i32* %arg, align 4 +; CHECK-NEXT: store i32 %c.merge_with_extracted, ptr %arg, align 4 +; CHECK-NEXT: store i32 %c.merge_with_extracted, ptr %arg, align 4 ; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: after: -; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted5, %exit0 ], [ %a.merge_with_extracted4, %exit1 ] -; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted8, %exit1 ] -; CHECK-NEXT: store i32 %a.merge_with_extracted, i32* %arg, align 4 -; CHECK-NEXT: store i32 %D, i32* %arg, align 4 +; CHECK-NEXT: after: +; CHECK-NEXT: %a.merge_with_extracted = phi i32 [ %a.merge_with_extracted2, %exit0 ], [ %a.merge_with_extracted1, %exit1 ] +; CHECK-NEXT: %D = phi i32 [ %b.merge_with_extracted, %exit0 ], [ %c.merge_with_extracted5, %exit1 ] +; CHECK-NEXT: store i32 %a.merge_with_extracted, ptr %arg, align 4 +; CHECK-NEXT: store i32 %D, ptr %arg, align 4 ; CHECK-NEXT: br label %return ; CHECK-EMPTY: -; CHECK-NEXT: return: +; CHECK-NEXT: return: ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, i32* %a.out, i32* %b.out, i32* %c.out, i32* %B.ce.out) { -; CHECK-NEXT: newFuncRoot: +; CHECK-LABEL: define internal i16 @func.region_start(i1 %c1, i1 %c2, i8 %dest, ptr %a.out, ptr %b.out, ptr %c.out, ptr %B.ce.out) { +; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: -; CHECK-NEXT: region_start: +; CHECK-NEXT: region_start: ; CHECK-NEXT: %a = add i32 42, 1 -; CHECK-NEXT: store i32 %a, i32* %a.out, align 4 +; CHECK-NEXT: store i32 %a, ptr %a.out, align 4 ; CHECK-NEXT: br i1 %c1, label %exiting0, label %exiting1 ; CHECK-EMPTY: -; CHECK-NEXT: exiting0: +; CHECK-NEXT: exiting0: ; CHECK-NEXT: %b = add i32 42, 2 -; CHECK-NEXT: store i32 %b, i32* %b.out, align 4 +; CHECK-NEXT: store i32 %b, ptr %b.out, align 4 ; CHECK-NEXT: br i1 %c2, label %exiting0.exit_crit_edge.exitStub, label %exit0.split ; CHECK-EMPTY: -; CHECK-NEXT: exiting1: +; CHECK-NEXT: exiting1: ; CHECK-NEXT: %c = add i32 42, 3 -; CHECK-NEXT: store i32 %c, i32* %c.out, align 4 +; CHECK-NEXT: store i32 %c, ptr %c.out, align 4 ; CHECK-NEXT: switch i8 %dest, label %fallback.exitStub [ ; CHECK-NEXT: i8 0, label %exit0.split ; CHECK-NEXT: i8 1, label %exit1.exitStub @@ -123,29 +119,29 @@ ; CHECK-NEXT: i8 3, label %exit0.split ; CHECK-NEXT: ] ; CHECK-EMPTY: -; CHECK-NEXT: exit0.split: +; CHECK-NEXT: exit0.split: ; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] -; CHECK-NEXT: store i32 %B.ce, i32* %B.ce.out, align 4 +; CHECK-NEXT: store i32 %B.ce, ptr %B.ce.out, align 4 ; CHECK-NEXT: br label %exit0.exitStub ; CHECK-EMPTY: -; CHECK-NEXT: exiting0.exit_crit_edge.exitStub: +; CHECK-NEXT: exiting0.exit_crit_edge.exitStub: ; CHECK-NEXT: ret i16 0 ; CHECK-EMPTY: -; CHECK-NEXT: fallback.exitStub: +; CHECK-NEXT: fallback.exitStub: ; CHECK-NEXT: ret i16 1 ; CHECK-EMPTY: -; CHECK-NEXT: exit1.exitStub: +; CHECK-NEXT: exit1.exitStub: ; CHECK-NEXT: ret i16 2 ; CHECK-EMPTY: -; CHECK-NEXT: exit2.exitStub: +; CHECK-NEXT: exit2.exitStub: ; CHECK-NEXT: ret i16 3 ; CHECK-EMPTY: -; CHECK-NEXT: exit0.exitStub: +; CHECK-NEXT: exit0.exitStub: ; CHECK-NEXT: ret i16 4 ; CHECK-NEXT: } -define void @func(i32* %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { +define void @func(ptr %arg, i1 %c0, i1 %c1, i1 %c2, i8 %dest) { entry: br i1 %c0, label %region_start, label %exit @@ -171,13 +167,13 @@ fallback: exit: %A = phi i32 [ 42, %entry ], [ %b, %exiting0 ] - store i32 %A, i32* %arg + store i32 %A, ptr %arg br label %return exit0: %B = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ] , [ %a, %exiting1 ] - store i32 %a, i32* %arg - store i32 %B, i32* %arg + store i32 %a, ptr %arg + store i32 %B, ptr %arg br label %after exit1: @@ -185,14 +181,14 @@ exit1: exit2: %C = phi i32 [ %c, %exiting1 ] - store i32 %c, i32* %arg - store i32 %C, i32* %arg + store i32 %c, ptr %arg + store i32 %C, ptr %arg br label %return after: %D = phi i32 [ %b, %exit0 ], [ %c, %exit1 ] - store i32 %a, i32* %arg - store i32 %D, i32* %arg + store i32 %a, ptr %arg + store i32 %D, ptr %arg br label %return return: diff --git a/llvm/test/tools/llvm-extract/extract-block-sink.ll b/llvm/test/tools/llvm-extract/extract-block-sink.ll index c6f33757b820a..66e0b4a7799f4 100644 --- a/llvm/test/tools/llvm-extract/extract-block-sink.ll +++ b/llvm/test/tools/llvm-extract/extract-block-sink.ll @@ -1,7 +1,5 @@ ; RUN: llvm-extract -S -bb "foo:region_start" --replace-with-call %s | FileCheck %s - - ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: %a = alloca i32, align 4 @@ -9,14 +7,14 @@ ; CHECK-NEXT: br label %codeRepl ; CHECK-EMPTY: ; CHECK-NEXT: codeRepl: -; CHECK-NEXT: call void @foo.region_start(i32* %b) +; CHECK-NEXT: call void @foo.region_start(ptr %b) ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: region_start: -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a) +; CHECK-NEXT: store i32 43, ptr %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a) +; CHECK-NEXT: store i32 44, ptr %b, align 4 ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: return: @@ -24,16 +22,16 @@ ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @foo.region_start(i32* %b) { +; CHECK-LABEL: define internal void @foo.region_start(ptr %b) { ; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: %a = alloca i32, align 4 ; CHECK-NEXT: br label %region_start ; CHECK-EMPTY: ; CHECK-NEXT: region_start: -; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 43, i32* %a, align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) -; CHECK-NEXT: store i32 44, i32* %b, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a) +; CHECK-NEXT: store i32 43, ptr %a, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a) +; CHECK-NEXT: store i32 44, ptr %b, align 4 ; CHECK-NEXT: br label %return.exitStub ; CHECK-EMPTY: ; CHECK-NEXT: return.exitStub: @@ -41,13 +39,8 @@ ; CHECK-NEXT: } - - - - -declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) -declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) - +declare void @llvm.lifetime.start.p0i32(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0i32(i64, ptr nocapture) define void @foo() { entry: @@ -56,10 +49,10 @@ entry: br label %region_start region_start: - call void @llvm.lifetime.start.p0i32(i64 4, i32* nonnull %a) - store i32 43, i32* %a - call void @llvm.lifetime.end.p0i32(i64 4, i32* nonnull %a) - store i32 44, i32* %b + call void @llvm.lifetime.start.p0i32(i64 4, ptr nonnull %a) + store i32 43, ptr %a + call void @llvm.lifetime.end.p0i32(i64 4, ptr nonnull %a) + store i32 44, ptr %b br label %return return: From c7170c73e8c560d5f141b43554bfb5ef8dc74f7e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 3 Nov 2024 23:10:40 +0100 Subject: [PATCH 124/130] clang-format --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index d0a9d0e3cc64b..5ac13cecaec55 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1337,19 +1337,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, normalizeCFGForExtraction(header); - if (!KeepOldBlocks) { - // Remove CondGuardInsts that will be moved to the new function from the old - // function's assumption cache. - for (BasicBlock *Block : Blocks) { - for (Instruction &I : llvm::make_early_inc_range(*Block)) { - if (auto *CI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(CI); - CI->eraseFromParent(); + if (!KeepOldBlocks) { + // Remove CondGuardInsts that will be moved to the new function from the old + // function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *CI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(CI); + CI->eraseFromParent(); + } } } } - } ValueSet SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; From 1839edc9e725dad39998acff151d5870b3a03a13 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 3 Nov 2024 23:19:05 +0100 Subject: [PATCH 125/130] clang-format --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index ca578273c6d8b..9889148cbf342 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1338,18 +1338,18 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, normalizeCFGForExtraction(header); if (!KeepOldBlocks) { - // Remove @llvm.assume calls that will be moved to the new function from the - // old function's assumption cache. - for (BasicBlock *Block : Blocks) { - for (Instruction &I : llvm::make_early_inc_range(*Block)) { - if (auto *AI = dyn_cast(&I)) { - if (AC) - AC->unregisterAssumption(AI); - AI->eraseFromParent(); + // Remove @llvm.assume calls that will be moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *AI = dyn_cast(&I)) { + if (AC) + AC->unregisterAssumption(AI); + AI->eraseFromParent(); + } } } } - } ValueSet SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; From 9767179ebee751cc9cc80d3f66f2fd2b7701dd4b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 3 Nov 2024 23:41:53 +0100 Subject: [PATCH 126/130] clang-format --- .../llvm/Transforms/Utils/CodeExtractor.h | 9 ++--- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 33 ++++++++++--------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 097f3325b7ca0..734f5f442961c 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -149,10 +149,11 @@ class CodeExtractorAnalysisCache { CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, bool AllowVarArgs = false, - bool AllowAlloca = false, - BasicBlock *AllocationBlock = nullptr, - std::string Suffix = "", bool ArgsInZeroAddressSpace = false, bool KeepOldBlocks = false); + AssumptionCache *AC = nullptr, bool AllowVarArgs = false, + bool AllowAlloca = false, + BasicBlock *AllocationBlock = nullptr, + std::string Suffix = "", bool ArgsInZeroAddressSpace = false, + bool KeepOldBlocks = false); /// Create a code extractor for a loop body. /// diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7654744aedaf1..a8e549788fd7e 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -251,14 +251,15 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, - bool AllowVarArgs, bool AllowAlloca, - BasicBlock *AllocationBlock, std::string Suffix, bool KeepOldBlocks, - bool ArgsInZeroAddressSpace) - : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), - AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca,KeepOldBlocks)), - Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} + bool AllowVarArgs, bool AllowAlloca, + BasicBlock *AllocationBlock, std::string Suffix, + bool KeepOldBlocks, bool ArgsInZeroAddressSpace) + : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), + AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, + KeepOldBlocks)), + Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, @@ -1870,14 +1871,14 @@ CallInst *CodeExtractor::emitReplacerCall( Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", &*AllocaBlock->getFirstInsertionPt()); - if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { - auto *StructSpaceCast = new AddrSpaceCastInst( - Struct, PointerType ::get(Context, 0), "structArg.ascast"); - StructSpaceCast->insertAfter(Struct); - params.push_back(StructSpaceCast); - } else { - params.push_back(Struct); - } + if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { + auto *StructSpaceCast = new AddrSpaceCastInst( + Struct, PointerType ::get(Context, 0), "structArg.ascast"); + StructSpaceCast->insertAfter(Struct); + params.push_back(StructSpaceCast); + } else { + params.push_back(Struct); + } unsigned AggIdx = 0; for (Value *input : inputs) { From c6a0ea1c6e07b082cf682b514eab66a099aa7e46 Mon Sep 17 00:00:00 2001 From: "U-BERGUFFLEN\\meinersbur" Date: Mon, 4 Nov 2024 02:38:47 +0100 Subject: [PATCH 127/130] Test fix --- llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll index 2b907659d7237..b7475aebd7770 100644 --- a/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll +++ b/llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll @@ -60,7 +60,7 @@ ; CHECK-NEXT: br label %return ; CHECK-EMPTY: ; CHECK-NEXT: exit0.split: -; CHECK-NEXT: %b.merge_with_extracted3 = phi i32 [ %b, %exiting0 ], [ undef, %exiting1 ], [ undef, %exiting1 ] +; CHECK-NEXT: %b.merge_with_extracted3 = phi i32 [ %b, %exiting0 ], [ poison, %exiting1 ], [ poison, %exiting1 ] ; CHECK-NEXT: %B.ce = phi i32 [ %b, %exiting0 ], [ %a, %exiting1 ], [ %a, %exiting1 ] ; CHECK-NEXT: br label %exit0 ; CHECK-EMPTY: From 1234ca64b63c925ae804dbe2afdbeb2bc134a9be Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 4 Nov 2024 13:13:02 +0100 Subject: [PATCH 128/130] clang-format --- .../llvm/Transforms/Utils/CodeExtractor.h | 7 +++---- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 2 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 18 ++++++++++-------- .../Transforms/Utils/CodeExtractorTest.cpp | 3 --- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 716a93a2442aa..734f5f442961c 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -151,10 +151,9 @@ class CodeExtractorAnalysisCache { BranchProbabilityInfo *BPI = nullptr, AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, - BasicBlock *AllocationBlock = nullptr, std::string Suffix = "" - , bool ArgsInZeroAddressSpace = false - , bool KeepOldBlocks = false - ); + BasicBlock *AllocationBlock = nullptr, + std::string Suffix = "", bool ArgsInZeroAddressSpace = false, + bool KeepOldBlocks = false); /// Create a code extractor for a loop body. /// diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index aed99a0aa3e5a..d05b0009bf9dd 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -182,7 +182,7 @@ bool BlockExtractor::runOnModule(Module &M) { /* AllowAlloca */ false, /* AllocationBlock */ nullptr, /* Suffix */ "", - /* ArgsInZeroAddressSpace */ false, + /* ArgsInZeroAddressSpace */ false, /* KeepOldBlocks */ KeepOldBlocks) .extractCodeRegion(CEAC); if (F) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0d8753fbe65da..0a3a41dadec0a 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -251,24 +251,28 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, - bool AllowVarArgs, bool AllowAlloca, BasicBlock *AllocationBlock, std::string Suffix, bool ArgsInZeroAddressSpace, bool KeepOldBlocks) + bool AllowVarArgs, bool AllowAlloca, + BasicBlock *AllocationBlock, std::string Suffix, + bool ArgsInZeroAddressSpace, bool KeepOldBlocks) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), - AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, KeepOldBlocks)), - Suffix(Suffix),ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} + AllowVarArgs(AllowVarArgs), KeepOldBlocks(KeepOldBlocks), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca, + KeepOldBlocks)), + Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), KeepOldBlocks(false), + BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), + KeepOldBlocks(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, /* AllowAlloca */ false, /* KeepOldBlocks */ false)), - Suffix(Suffix), ArgsInZeroAddressSpace(false) {} + Suffix(Suffix), ArgsInZeroAddressSpace(false) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -1118,8 +1122,6 @@ static void insertLifetimeMarkersSurroundingCall( } } - - void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *oldFunc = Blocks.front()->getParent(); auto newFuncIt = newFunction->begin(); diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 804c1533db68e..a48727f1e00d3 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -574,7 +574,6 @@ TEST(CodeExtractor, PartialAggregateArgs) { EXPECT_FALSE(verifyFunction(*Func)); } - TEST(CodeExtractor, AllocaBlock) { LLVMContext Ctx; SMDiagnostic Err; @@ -626,8 +625,6 @@ TEST(CodeExtractor, AllocaBlock) { EXPECT_EQ(NumAllocas, 2); } - - TEST(CodeExtractor, OpenMPAggregateArgs) { LLVMContext Ctx; SMDiagnostic Err; From df93735c1b629a7446108a13d9ec484ae69fe57d Mon Sep 17 00:00:00 2001 From: "U-BERGUFFLEN\\meinersbur" Date: Mon, 4 Nov 2024 13:26:04 +0100 Subject: [PATCH 129/130] post-merge fix --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index a41d2ec4cc224..b6906ab1f2207 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -260,19 +260,6 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, KeepOldBlocks)), Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, AssumptionCache *AC, - std::string Suffix) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), - KeepOldBlocks(false), - Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, - /* AllowVarArgs */ false, - /* AllowAlloca */ false, - /* KeepOldBlocks */ false)), - Suffix(Suffix), ArgsInZeroAddressSpace(false) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector &Blocks, Value *V) { From 394703159e1a5fbbc5177ea84df72d6ffacbcb65 Mon Sep 17 00:00:00 2001 From: "U-BERGUFFLEN\\meinersbur" Date: Mon, 4 Nov 2024 14:05:59 +0100 Subject: [PATCH 130/130] post-merge fix --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index beacce4a56868..8dc4a2e3fd348 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -811,21 +811,16 @@ Function *CodeExtractor::constructFunctionDeclaration( // Assemble the function's parameter lists. std::vector ParamTy; std::vector AggParamTy; - std::vector> NumberedInputs; - std::vector> NumberedOutputs; const DataLayout &DL = M->getDataLayout(); // Add the types of the input values to the function's argument list - unsigned ArgNum = 0; for (Value *value : inputs) { LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) { AggParamTy.push_back(value->getType()); StructValues.insert(value); - } else { + } else ParamTy.push_back(value->getType()); - NumberedInputs.emplace_back(ArgNum++, value); - } } // Add the types of the output values to the function's argument list. @@ -834,11 +829,9 @@ Function *CodeExtractor::constructFunctionDeclaration( if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { AggParamTy.push_back(output->getType()); StructValues.insert(output); - } else { + } else ParamTy.push_back( PointerType::get(output->getType(), DL.getAllocaAddrSpace())); - NumberedOutputs.emplace_back(ArgNum++, output); - } } assert(