Skip to content

Commit 6fb040d

Browse files
Merge branch 'main' into cir-shuffle-codegen
2 parents 3c14ebd + cd3192a commit 6fb040d

File tree

385 files changed

+17159
-3388
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

385 files changed

+17159
-3388
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,11 @@ class MCPlusBuilder {
538538
llvm_unreachable("not implemented");
539539
}
540540

541+
virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
542+
MCContext *Ctx) {
543+
llvm_unreachable("not implemented");
544+
}
545+
541546
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
542547

543548
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {

bolt/lib/Passes/Inliner.cpp

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -491,32 +491,6 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
491491
}
492492
}
493493

494-
// AArch64 BTI:
495-
// If the callee has an indirect tailcall (BR), we would transform it to
496-
// an indirect call (BLR) in InlineCall. Because of this, we would have to
497-
// update the BTI at the target of the tailcall. However, these targets
498-
// are not known. Instead, we skip inlining blocks with indirect
499-
// tailcalls.
500-
auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool {
501-
for (const auto &BB : BF) {
502-
for (const auto &II : BB) {
503-
if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) {
504-
return true;
505-
}
506-
}
507-
}
508-
return false;
509-
};
510-
511-
if (BC.isAArch64() && BC.usesBTI() &&
512-
HasIndirectTailCall(*TargetFunction)) {
513-
++InstIt;
514-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall"
515-
<< " in " << Function << " : " << BB->getName()
516-
<< " to keep BTIs consistent.\n");
517-
continue;
518-
}
519-
520494
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
521495
<< " in " << Function << " : " << BB->getName()
522496
<< ". Count: " << BB->getKnownExecutionCount()

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
305305
: IndCallHandlerExitBBFunction->getSymbol(),
306306
IndCallSiteID, &*BC.Ctx);
307307

308-
Iter = BB.eraseInstruction(Iter);
309-
Iter = insertInstructions(CounterInstrs, BB, Iter);
310-
--Iter;
308+
if (!BC.isAArch64()) {
309+
Iter = BB.eraseInstruction(Iter);
310+
Iter = insertInstructions(CounterInstrs, BB, Iter);
311+
--Iter;
312+
} else
313+
Iter = insertInstructions(CounterInstrs, BB, Iter);
311314
}
312315

313316
bool Instrumentation::instrumentOneTarget(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 86 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
4848

4949
namespace {
5050

51-
static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
51+
[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5252
Inst.setOpcode(AArch64::MRS);
5353
Inst.clear();
5454
Inst.addOperand(MCOperand::createReg(RegName));
5555
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
5656
}
5757

58-
static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
58+
[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5959
Inst.setOpcode(AArch64::MSR);
6060
Inst.clear();
6161
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2114,6 +2114,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
21142114
convertJmpToTailCall(Inst);
21152115
}
21162116

2117+
void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
2118+
MCContext *Ctx) override {
2119+
Inst.setOpcode(AArch64::B);
2120+
Inst.clear();
2121+
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
2122+
Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
2123+
}
2124+
21172125
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
21182126
const MCSymbol *&TBB, const MCSymbol *&FBB,
21192127
MCInst *&CondBranch,
@@ -2471,21 +2479,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24712479
}
24722480

24732481
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
2474-
InstructionListType Insts(5);
24752482
// Code sequence for instrumented indirect call handler:
2476-
// msr nzcv, x1
2477-
// ldp x0, x1, [sp], #16
2478-
// ldr x16, [sp], #16
2479-
// ldp x0, x1, [sp], #16
2480-
// br x16
2481-
setSystemFlag(Insts[0], AArch64::X1);
2482-
createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
2483-
// Here we load address of the next function which should be called in the
2484-
// original binary to X16 register. Writing to X16 is permitted without
2485-
// needing to restore.
2486-
loadReg(Insts[2], AArch64::X16, AArch64::SP);
2487-
createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
2488-
createIndirectBranch(Insts[4], AArch64::X16, 0);
2483+
// ret
2484+
2485+
InstructionListType Insts;
2486+
2487+
Insts.emplace_back();
2488+
createReturn(Insts.back());
2489+
24892490
return Insts;
24902491
}
24912492

@@ -2561,39 +2562,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25612562
MCSymbol *HandlerFuncAddr,
25622563
int CallSiteID,
25632564
MCContext *Ctx) override {
2564-
InstructionListType Insts;
25652565
// Code sequence used to enter indirect call instrumentation helper:
2566-
// stp x0, x1, [sp, #-16]! createPushRegisters
2567-
// mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
2566+
// stp x0, x1, [sp, #-16]! createPushRegisters (1)
2567+
// mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
25682568
// mov x1 CallSiteID createLoadImmediate ->
25692569
// movk x1, #0x0, lsl #48
25702570
// movk x1, #0x0, lsl #32
25712571
// movk x1, #0x0, lsl #16
25722572
// movk x1, #0x0
2573-
// stp x0, x1, [sp, #-16]!
2574-
// bl *HandlerFuncAddr createIndirectCall ->
2573+
// stp x0, x30, [sp, #-16]! (2)
25752574
// adr x0 *HandlerFuncAddr -> adrp + add
2576-
// blr x0
2575+
// blr x0 (__bolt_instr_ind_call_handler_func)
2576+
// ldp x0, x30, [sp], #16 (2)
2577+
// mov x0, target ; move target address to used register
2578+
// ldp x0, x1, [sp], #16 (1)
2579+
2580+
InstructionListType Insts;
25772581
Insts.emplace_back();
2578-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2582+
createPushRegisters(Insts.back(), getIntArgRegister(0),
2583+
getIntArgRegister(1));
25792584
Insts.emplace_back(CallInst);
2580-
convertIndirectCallToLoad(Insts.back(), AArch64::X0);
2585+
convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
25812586
InstructionListType LoadImm =
25822587
createLoadImmediate(getIntArgRegister(1), CallSiteID);
25832588
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
25842589
Insts.emplace_back();
2585-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2590+
createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
25862591
Insts.resize(Insts.size() + 2);
2587-
InstructionListType Addr =
2588-
materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
2592+
InstructionListType Addr = materializeAddress(
2593+
HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
25892594
assert(Addr.size() == 2 && "Invalid Addr size");
25902595
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2596+
25912597
Insts.emplace_back();
2592-
createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
2598+
createIndirectCallInst(Insts.back(), false,
2599+
CallInst.getOperand(0).getReg());
25932600

2594-
// Carry over metadata including tail call marker if present.
2595-
stripAnnotations(Insts.back());
2596-
moveAnnotations(std::move(CallInst), Insts.back());
2601+
Insts.emplace_back();
2602+
createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
2603+
2604+
// move x0 to indirect call register
2605+
Insts.emplace_back();
2606+
Insts.back().setOpcode(AArch64::ORRXrs);
2607+
Insts.back().insert(Insts.back().begin(),
2608+
MCOperand::createReg(CallInst.getOperand(0).getReg()));
2609+
Insts.back().insert(Insts.back().begin() + 1,
2610+
MCOperand::createReg(AArch64::XZR));
2611+
Insts.back().insert(Insts.back().begin() + 2,
2612+
MCOperand::createReg(getIntArgRegister(0)));
2613+
Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
2614+
2615+
Insts.emplace_back();
2616+
createPopRegisters(Insts.back(), getIntArgRegister(0),
2617+
getIntArgRegister(1));
25972618

25982619
return Insts;
25992620
}
@@ -2602,43 +2623,53 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
26022623
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
26032624
const MCSymbol *IndCallHandler,
26042625
MCContext *Ctx) override {
2605-
// Code sequence used to check whether InstrTampoline was initialized
2626+
// Code sequence used to check whether InstrTrampoline was initialized
26062627
// and call it if so, returns via IndCallHandler
2607-
// stp x0, x1, [sp, #-16]!
2608-
// mrs x1, nzcv
2609-
// adr x0, InstrTrampoline -> adrp + add
2610-
// ldr x0, [x0]
2628+
// adrp x0, InstrTrampoline
2629+
// ldr x0, [x0, #lo12:InstrTrampoline]
26112630
// subs x0, x0, #0x0
26122631
// b.eq IndCallHandler
26132632
// str x30, [sp, #-16]!
26142633
// blr x0
26152634
// ldr x30, [sp], #16
26162635
// b IndCallHandler
26172636
InstructionListType Insts;
2637+
2638+
// load handler address
2639+
MCInst InstAdrp;
2640+
InstAdrp.setOpcode(AArch64::ADRP);
2641+
InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2642+
InstAdrp.addOperand(MCOperand::createImm(0));
2643+
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
2644+
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
2645+
Insts.emplace_back(InstAdrp);
2646+
2647+
MCInst InstLoad;
2648+
InstLoad.setOpcode(AArch64::LDRXui);
2649+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2650+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2651+
InstLoad.addOperand(MCOperand::createImm(0));
2652+
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
2653+
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
2654+
Insts.emplace_back(InstLoad);
2655+
2656+
InstructionListType CmpJmp =
2657+
createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
2658+
Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
2659+
26182660
Insts.emplace_back();
2619-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2620-
Insts.emplace_back();
2621-
getSystemFlag(Insts.back(), getIntArgRegister(1));
2622-
Insts.emplace_back();
2623-
Insts.emplace_back();
2624-
InstructionListType Addr =
2625-
materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
2626-
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2627-
assert(Addr.size() == 2 && "Invalid Addr size");
2628-
Insts.emplace_back();
2629-
loadReg(Insts.back(), AArch64::X0, AArch64::X0);
2630-
InstructionListType cmpJmp =
2631-
createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
2632-
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
2633-
Insts.emplace_back();
2634-
storeReg(Insts.back(), AArch64::LR, AArch64::SP);
2661+
storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2662+
26352663
Insts.emplace_back();
26362664
Insts.back().setOpcode(AArch64::BLR);
2637-
Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
2665+
Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
2666+
26382667
Insts.emplace_back();
2639-
loadReg(Insts.back(), AArch64::LR, AArch64::SP);
2668+
loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2669+
26402670
Insts.emplace_back();
2641-
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
2671+
createDirectBranch(Insts.back(), IndCallHandler, Ctx);
2672+
26422673
return Insts;
26432674
}
26442675

bolt/runtime/instr.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,9 +1691,12 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
16911691
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
16921692
{
16931693
#if defined(__aarch64__)
1694+
// the target address is placed on stack
1695+
// the identifier of the indirect call site is placed in X1 register
1696+
16941697
// clang-format off
16951698
__asm__ __volatile__(SAVE_ALL
1696-
"ldp x0, x1, [sp, #288]\n"
1699+
"ldr x0, [sp, #272]\n"
16971700
"bl instrumentIndirectCall\n"
16981701
RESTORE_ALL
16991702
"ret\n"
@@ -1728,9 +1731,12 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
17281731
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
17291732
{
17301733
#if defined(__aarch64__)
1734+
// the target address is placed on stack
1735+
// the identifier of the indirect call site is placed in X1 register
1736+
17311737
// clang-format off
17321738
__asm__ __volatile__(SAVE_ALL
1733-
"ldp x0, x1, [sp, #288]\n"
1739+
"ldr x0, [sp, #272]\n"
17341740
"bl instrumentIndirectCall\n"
17351741
RESTORE_ALL
17361742
"ret\n"

bolt/runtime/sys_aarch64.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
"stp x24, x25, [sp, #-16]!\n" \
1919
"stp x26, x27, [sp, #-16]!\n" \
2020
"stp x28, x29, [sp, #-16]!\n" \
21-
"str x30, [sp,#-16]!\n"
21+
"mrs x29, nzcv\n" \
22+
"stp x29, x30, [sp, #-16]!\n"
2223
// Mirrors SAVE_ALL
2324
#define RESTORE_ALL \
24-
"ldr x30, [sp], #16\n" \
25+
"ldp x29, x30, [sp], #16\n" \
26+
"msr nzcv, x29\n" \
2527
"ldp x28, x29, [sp], #16\n" \
2628
"ldp x26, x27, [sp], #16\n" \
2729
"ldp x24, x25, [sp], #16\n" \

bolt/test/AArch64/inline-bti.s

Lines changed: 0 additions & 39 deletions
This file was deleted.

0 commit comments

Comments
 (0)