-
Notifications
You must be signed in to change notification settings - Fork 15.2k
AMDGPU: Account for read/write register intrinsics for AGPR usage #161988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/amdgpu/parse-num-regs-asm-attributor
Are you sure you want to change the base?
Conversation
Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this.
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFix the special case intrinsics that can directly reference a physical Full diff: https://github.com/llvm/llvm-project/pull/161988.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 155b1a6a380dd..e8e39ff015eed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
return false;
}
- // Some intrinsics may use AGPRs, but if we have a choice, we are not
- // required to use AGPRs.
- if (Callee->isIntrinsic())
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::write_register:
+ case Intrinsic::read_register:
+ case Intrinsic::read_volatile_register: {
+ const MDString *RegName = cast<MDString>(
+ cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
+ auto [Kind, RegIdx, NumRegs] =
+ AMDGPU::parseAsmPhysRegName(RegName->getString());
+ return Kind != 'a';
+ }
+ default:
+ // Some intrinsics may use AGPRs, but if we have a choice, we are not
+ // required to use AGPRs.
return true;
+ }
// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 20fa1412a778e..8e57498c56f76 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
return C == 'v' || C == 's' || C == 'a';
}
-std::tuple<char, unsigned, unsigned>
-parseAsmConstraintPhysReg(StringRef Constraint) {
- StringRef RegName = Constraint;
- if (!RegName.consume_front("{") || !RegName.consume_back("}"))
- return {};
-
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
char Kind = RegName.front();
if (!isValidRegPrefix(Kind))
return {};
@@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
return {};
}
+std::tuple<char, unsigned, unsigned>
+parseAsmConstraintPhysReg(StringRef Constraint) {
+ StringRef RegName = Constraint;
+ if (!RegName.consume_front("{") || !RegName.consume_back("}"))
+ return {};
+ return parseAsmPhysRegName(RegName);
+}
+
std::pair<unsigned, unsigned>
getIntegerPairAttribute(const Function &F, StringRef Name,
std::pair<unsigned, unsigned> Default,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2b9c063f42a5e..b656414f2b85c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
/// target triple \p TT, false otherwise.
bool shouldEmitConstantsToTextSection(const Triple &TT);
+/// Returns a valid charcode or 0 in the first entry if this is a valid physical
+/// register name. Followed by the start register number, and the register
+/// width. Does not validate the number of registers exists in the class. Unlike
+/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
+/// "{}".
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
+
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
/// register constraint. Followed by the start register number, and the register
/// width. Does not validate the number of registers exists in the class.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..7fcd07486d54d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -168,7 +168,7 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect)
define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() {
define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @empty()
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
; CHECK-NEXT: call void @use_most()
@@ -616,12 +616,93 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
ret void
}
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !"a55", i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !"v55", i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_register.i64(metadata !"a55")
+ store i32 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
+ store i32 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
+; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT: ret void
+;
+ %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
+ store i128 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !"a256", i32 0)
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
;.
|
Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.