Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit cf4271c

Browse files
committed
Promotion from dev branch inclusive to ded07b8
2 parents a22984c + ded07b8 commit cf4271c

14 files changed

+83
-84
lines changed

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
476476
// Now that we know which registers need to be saved and restored, allocate
477477
// stack slots for them.
478478
for (auto &CS : CSI) {
479-
// If the target has spilled this register to another register, we don't
480-
// need to allocate a stack slot.
479+
// If the target has spilled this register to another register or already
480+
// handled it , we don't need to allocate a stack slot.
481481
if (CS.isSpilledToReg() || CS.isHandledByTarget())
482482
continue;
483483

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,15 +1246,15 @@ bool AMDGPUCallLowering::lowerTailCall(
12461246
// On GFX12, we can only change the VGPR allocation for wave32.
12471247
if (!ST.isWave32()) {
12481248
F.getContext().diagnose(DiagnosticInfoUnsupported(
1249-
F, "Dynamic VGPR mode is only supported for wave32\n"));
1249+
F, "dynamic VGPR mode is only supported for wave32"));
12501250
return false;
12511251
}
12521252

12531253
ArgInfo FallbackExecArg = Info.OrigArgs[ChainCallArgIdx::FallbackExec];
12541254
assert(FallbackExecArg.Regs.size() == 1 &&
12551255
"Expected single register for fallback EXEC");
12561256
if (!FallbackExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {
1257-
LLVM_DEBUG(dbgs() << "Bad type for fallback EXEC");
1257+
LLVM_DEBUG(dbgs() << "Bad type for fallback EXEC\n");
12581258
return false;
12591259
}
12601260
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5475,6 +5475,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
54755475
NODE_NAME_CASE(TC_RETURN)
54765476
NODE_NAME_CASE(TC_RETURN_GFX)
54775477
NODE_NAME_CASE(TC_RETURN_CHAIN)
5478+
NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
54785479
NODE_NAME_CASE(TRAP)
54795480
NODE_NAME_CASE(RET_GLUE)
54805481
NODE_NAME_CASE(WAVE_ADDRESS)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ enum NodeType : unsigned {
402402
TC_RETURN,
403403
TC_RETURN_GFX,
404404
TC_RETURN_CHAIN,
405+
TC_RETURN_CHAIN_DVGPR,
405406
TRAP,
406407

407408
// Masked control flow nodes.

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN",
9999
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
100100
>;
101101

102+
// With dynamic VGPRs.
103+
def AMDGPUtc_return_chain_dvgpr: SDNode<"AMDGPUISD::TC_RETURN_CHAIN_DVGPR",
104+
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
105+
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
106+
>;
107+
102108
def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
103109
SDTypeProfile<0, 1, [SDTCisVT<0, i16>]>,
104110
[SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue]

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -182,28 +182,26 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
182182
return AsmPrinter::lowerConstant(CV);
183183
}
184184

185-
static void emitVGPRBlockComment(const MachineInstr *MI, MCStreamer &OS) {
185+
static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
186+
const TargetRegisterInfo *TRI,
187+
const SIMachineFunctionInfo *MFI,
188+
MCStreamer &OS) {
186189
// The instruction will only transfer a subset of the registers in the block,
187190
// based on the mask that is stored in m0. We could search for the instruction
188191
// that sets m0, but most of the time we'll already have the mask stored in
189192
// the machine function info. Try to use that. This assumes that we only use
190193
// block loads/stores for CSR spills.
191-
const MachineFunction *MF = MI->getParent()->getParent();
192-
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
193-
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
194-
const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();
195-
196194
Register RegBlock =
197195
TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
198196
: AMDGPU::OpName::vdata)
199197
->getReg();
200-
Register FirstRegInBlock = TRI.getSubReg(RegBlock, AMDGPU::sub0);
198+
Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
201199
uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);
202200

203201
SmallString<512> TransferredRegs;
204202
for (unsigned I = 0; I < 32; ++I) {
205203
if (Mask & (1 << I)) {
206-
(llvm::Twine(" ") + TRI.getName(FirstRegInBlock + I))
204+
(llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
207205
.toVector(TransferredRegs);
208206
}
209207
}
@@ -300,9 +298,11 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
300298
return;
301299
}
302300

303-
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
304-
if (isVerbose())
305-
emitVGPRBlockComment(MI, *OutStreamer);
301+
if (isVerbose())
302+
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
303+
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
304+
MF->getInfo<SIMachineFunctionInfo>(),
305+
*OutStreamer);
306306

307307
MCInst TmpInst;
308308
MCInstLowering.lower(MI, TmpInst);

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,7 +1894,6 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
18941894

18951895
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
18961896
const GCNSubtarget &ST,
1897-
const TargetRegisterInfo *TRI,
18981897
std::vector<CalleeSavedInfo> &CSI,
18991898
unsigned &MinCSFrameIndex,
19001899
unsigned &MaxCSFrameIndex) {
@@ -1921,8 +1920,8 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
19211920
if (!CanUseBlockOps(*CSIt))
19221921
continue;
19231922

1924-
// Find all the regs that will fit in a 32-bit block starting at the current
1925-
// reg and build the mask. It should have 1 for every register that's
1923+
// Find all the regs that will fit in a 32-bit mask starting at the current
1924+
// reg and build said mask. It should have 1 for every register that's
19261925
// included, with the current register as the least significant bit.
19271926
uint32_t Mask = 1;
19281927
CSEnd = std::remove_if(
@@ -1935,8 +1934,7 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
19351934
}
19361935
});
19371936

1938-
const TargetRegisterClass *BlockRegClass =
1939-
TII->getRegClassForBlockOp(TRI, MF);
1937+
const TargetRegisterClass *BlockRegClass = &AMDGPU::VReg_1024RegClass;
19401938
Register RegBlock =
19411939
MRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
19421940
if (!RegBlock) {
@@ -1990,8 +1988,7 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
19901988
bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
19911989

19921990
if (UseVGPRBlocks)
1993-
assignSlotsUsingVGPRBlocks(MF, ST, TRI, CSI, MinCSFrameIndex,
1994-
MaxCSFrameIndex);
1991+
assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
19951992

19961993
return assignCalleeSavedSpillSlots(MF, TRI, CSI);
19971994
}
@@ -2152,10 +2149,10 @@ bool SIFrameLowering::restoreCalleeSavedRegisters(
21522149
// VGPRs in the register block is reserved (e.g. if it's a WWM register),
21532150
// then the whole block will be marked as reserved and `updateLiveness` will
21542151
// skip it.
2155-
if (!MBB.isLiveIn(Reg))
2156-
MBB.addLiveIn(Reg);
2152+
MBB.addLiveIn(Reg);
21572153
}
21582154

2155+
MBB.sortUniqueLiveIns();
21592156
return false;
21602157
}
21612158

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3710,6 +3710,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
37103710
SDValue Callee = CLI.Callee;
37113711

37123712
llvm::SmallVector<SDValue, 6> ChainCallSpecialArgs;
3713+
bool UsesDynamicVGPRs = false;
37133714
if (IsChainCallConv) {
37143715
// The last arguments should be the value that we need to put in EXEC,
37153716
// followed by the flags and any other arguments with special meanings.
@@ -3758,6 +3759,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
37583759
return lowerUnhandledCall(CLI, InVals, "Expected 3 additional args");
37593760
}
37603761

3762+
UsesDynamicVGPRs = true;
37613763
std::for_each(CLI.Args.begin() + ChainCallArgIdx::NumVGPRs,
37623764
CLI.Args.end(), PushNodeOrTargetConstant);
37633765
}
@@ -4091,7 +4093,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
40914093
break;
40924094
case CallingConv::AMDGPU_CS_Chain:
40934095
case CallingConv::AMDGPU_CS_ChainPreserve:
4094-
OPC = AMDGPUISD::TC_RETURN_CHAIN;
4096+
OPC = UsesDynamicVGPRs ? AMDGPUISD::TC_RETURN_CHAIN_DVGPR
4097+
: AMDGPUISD::TC_RETURN_CHAIN;
40954098
break;
40964099
}
40974100

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5769,16 +5769,6 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
57695769
IsAllocatable);
57705770
}
57715771

5772-
const TargetRegisterClass *
5773-
SIInstrInfo::getRegClassForBlockOp(const TargetRegisterInfo *TRI,
5774-
const MachineFunction &MF) const {
5775-
const MCInstrDesc &ScratchStoreBlockOp =
5776-
get(AMDGPU::SCRATCH_STORE_BLOCK_SADDR);
5777-
int VDataIdx = AMDGPU::getNamedOperandIdx(ScratchStoreBlockOp.getOpcode(),
5778-
AMDGPU::OpName::vdata);
5779-
return getRegClass(ScratchStoreBlockOp, VDataIdx, TRI, MF);
5780-
}
5781-
57825772
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
57835773
unsigned OpNo) const {
57845774
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
14411441
const MachineFunction &MF)
14421442
const override;
14431443

1444-
const TargetRegisterClass *
1445-
getRegClassForBlockOp(const TargetRegisterInfo *TRI,
1446-
const MachineFunction &MF) const;
1447-
14481444
void fixImplicitOperands(MachineInstr &MI) const;
14491445

14501446
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,

0 commit comments

Comments
 (0)