Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
return false;
}

// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
if (Callee->isIntrinsic())
switch (Callee->getIntrinsicID()) {
case Intrinsic::write_register:
case Intrinsic::read_register:
case Intrinsic::read_volatile_register: {
const MDString *RegName = cast<MDString>(
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
auto [Kind, RegIdx, NumRegs] =
AMDGPU::parseAsmPhysRegName(RegName->getString());
return Kind != 'a';
}
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
return true;
}

// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
return C == 'v' || C == 's' || C == 'a';
}

std::tuple<char, unsigned, unsigned>
parseAsmConstraintPhysReg(StringRef Constraint) {
StringRef RegName = Constraint;
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
return {};

std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
char Kind = RegName.front();
if (!isValidRegPrefix(Kind))
return {};
Expand All @@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
return {};
}

std::tuple<char, unsigned, unsigned>
parseAsmConstraintPhysReg(StringRef Constraint) {
StringRef RegName = Constraint;
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
return {};
return parseAsmPhysRegName(RegName);
}

std::pair<unsigned, unsigned>
getIntegerPairAttribute(const Function &F, StringRef Name,
std::pair<unsigned, unsigned> Default,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
/// target triple \p TT, false otherwise.
bool shouldEmitConstantsToTextSection(const Triple &TT);

/// Returns a valid charcode or 0 in the first entry if this is a valid physical
/// register name. Followed by the start register number, and the register
/// width. Does not validate the number of registers exists in the class. Unlike
/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
/// "{}".
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);

/// Returns a valid charcode or 0 in the first entry if this is a valid physical
/// register constraint. Followed by the start register number, and the register
/// width. Does not validate the number of registers exists in the class.
Expand Down
99 changes: 90 additions & 9 deletions llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ declare void @unknown()

define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
Expand All @@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {

define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
Expand All @@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
Expand All @@ -216,7 +216,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect)

define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() {

define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @empty()
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
; CHECK-NEXT: call void @use_most()
Expand Down Expand Up @@ -616,12 +616,93 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
ret void
}

define amdgpu_kernel void @kernel_uses_write_register_a55() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0)
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !"a55", i32 0)
ret void
}

define amdgpu_kernel void @kernel_uses_write_register_v55() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0)
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !"v55", i32 0)
ret void
}

define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
; CHECK-SAME: ) #[[ATTR3]] {
; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
ret void
}

define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: ret void
;
%reg = call i32 @llvm.read_register.i64(metadata !"a55")
store i32 %reg, ptr addrspace(1) %ptr
ret void
}

define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: ret void
;
%reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
store i32 %reg, ptr addrspace(1) %ptr
ret void
}

define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: ret void
;
%reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
store i128 %reg, ptr addrspace(1) %ptr
ret void
}

define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
; CHECK-SAME: ) #[[ATTR3]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0)
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !"a256", i32 0)
ret void
}

attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
;.
Loading