Skip to content

Commit ebc87b7

Browse files
authored
reland offload use error (llvm#160811) (llvm#4278)
2 parents 8b3326f + 59fe478 commit ebc87b7

File tree

6 files changed

+180
-132
lines changed

6 files changed

+180
-132
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 35 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,11 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
623623
assert(MemoryManager && "Invalid memory manager");
624624
assert(PtrStorage && "Invalid pointer storage");
625625

626-
*PtrStorage = MemoryManager->allocate(Size, nullptr);
626+
auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr);
627+
if (!PtrStorageOrErr)
628+
return PtrStorageOrErr.takeError();
629+
630+
*PtrStorage = *PtrStorageOrErr;
627631
if (Size && *PtrStorage == nullptr)
628632
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
629633
"failure to allocate from AMDGPU memory manager");
@@ -643,15 +647,12 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
643647
private:
644648
/// Allocation callback that will be called once the memory manager does not
645649
/// have more previously allocated buffers.
646-
void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
650+
Expected<void *> allocate(size_t Size, void *HstPtr,
651+
TargetAllocTy Kind) override;
647652

648653
/// Deallocation callback that will be called by the memory manager.
649-
int free(void *TgtPtr, TargetAllocTy Kind) override {
650-
if (auto Err = MemoryPool->deallocate(TgtPtr)) {
651-
consumeError(std::move(Err));
652-
return OFFLOAD_FAIL;
653-
}
654-
return OFFLOAD_SUCCESS;
654+
Error free(void *TgtPtr, TargetAllocTy Kind) override {
655+
return MemoryPool->deallocate(TgtPtr);
655656
}
656657

657658
/// The underlying plugin that owns this memory manager.
@@ -3651,12 +3652,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36513652
}
36523653

36533654
/// Allocate memory on the device or related to the device.
3654-
void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
3655+
Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override;
36553656

36563657
/// Deallocate memory on the device or related to the device.
3657-
int free(void *TgtPtr, TargetAllocTy Kind) override {
3658+
Error free(void *TgtPtr, TargetAllocTy Kind) override {
36583659
if (TgtPtr == nullptr)
3659-
return OFFLOAD_SUCCESS;
3660+
return Plugin::success();
36603661

36613662
AMDGPUMemoryPoolTy *MemoryPool = nullptr;
36623663
switch (Kind) {
@@ -3672,17 +3673,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36723673
break;
36733674
}
36743675

3675-
if (!MemoryPool) {
3676-
REPORT("No memory pool for the specified allocation kind\n");
3677-
return OFFLOAD_FAIL;
3678-
}
3676+
if (!MemoryPool)
3677+
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
3678+
"no memory pool for the specified allocation kind");
36793679

3680-
if (Error Err = MemoryPool->deallocate(TgtPtr)) {
3681-
REPORT("%s\n", toString(std::move(Err)).data());
3682-
return OFFLOAD_FAIL;
3683-
}
3680+
if (auto Err = MemoryPool->deallocate(TgtPtr))
3681+
return Err;
36843682

3685-
return OFFLOAD_SUCCESS;
3683+
return Plugin::success();
36863684
}
36873685

36883686
/// Synchronize current thread with the pending operations on the async info.
@@ -5773,14 +5771,13 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
57735771
return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
57745772
}
57755773

5776-
void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
5777-
TargetAllocTy Kind) {
5774+
Expected<void *> AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
5775+
TargetAllocTy Kind) {
57785776
// Allocate memory from the pool.
57795777
void *Ptr = nullptr;
5780-
if (auto Err = MemoryPool->allocate(Size, &Ptr)) {
5781-
consumeError(std::move(Err));
5782-
return nullptr;
5783-
}
5778+
if (auto Err = MemoryPool->allocate(Size, &Ptr))
5779+
return std::move(Err);
5780+
57845781
assert(Ptr && "Invalid pointer");
57855782

57865783
// Get a list of agents that can access this memory pool.
@@ -5790,14 +5787,13 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
57905787
[&](hsa_agent_t Agent) { return MemoryPool->canAccess(Agent); });
57915788

57925789
// Allow all valid kernel agents to access the allocation.
5793-
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents)) {
5794-
REPORT("%s\n", toString(std::move(Err)).data());
5795-
return nullptr;
5796-
}
5790+
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents))
5791+
return std::move(Err);
57975792
return Ptr;
57985793
}
57995794

5800-
void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
5795+
Expected<void *> AMDGPUDeviceTy::allocate(size_t Size, void *,
5796+
TargetAllocTy Kind) {
58015797
if (Size == 0)
58025798
return nullptr;
58035799

@@ -5821,17 +5817,15 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
58215817
MemoryPool = CoarseGrainedMemoryPools[0];
58225818
}
58235819

5824-
if (!MemoryPool) {
5825-
REPORT("No memory pool for the specified allocation kind\n");
5826-
return nullptr;
5827-
}
5820+
if (!MemoryPool)
5821+
return Plugin::error(ErrorCode::UNSUPPORTED,
5822+
"no memory pool for the specified allocation kind");
58285823

58295824
// Allocate from the corresponding memory pool.
58305825
void *Alloc = nullptr;
5831-
if (Error Err = MemoryPool->allocate(Size, &Alloc)) {
5832-
REPORT("%s\n", toString(std::move(Err)).data());
5833-
return nullptr;
5834-
}
5826+
if (auto Err = MemoryPool->allocate(Size, &Alloc))
5827+
return std::move(Err);
5828+
58355829
if (MemoryPool == CoarseGrainedMemoryPools[0] && IsEquippedWithGFX90A &&
58365830
EnableGFX90ACoarseGrainUsmMaps) {
58375831
// Need to register in the coarse grain usm map table
@@ -5854,10 +5848,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
58545848
});
58555849

58565850
// Enable all valid kernel agents to access the buffer.
5857-
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents)) {
5858-
REPORT("%s\n", toString(std::move(Err)).data());
5859-
return nullptr;
5860-
}
5851+
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents))
5852+
return std::move(Err);
58615853
}
58625854

58635855
return Alloc;

offload/plugins-nextgen/common/include/MemoryManager.h

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,24 @@
2525
#include "Shared/Utils.h"
2626
#include "omptarget.h"
2727

28+
#include "llvm/Support/Error.h"
29+
30+
namespace llvm {
31+
2832
/// Base class of per-device allocator.
2933
class DeviceAllocatorTy {
3034
public:
3135
virtual ~DeviceAllocatorTy() = default;
3236

3337
/// Allocate a memory of size \p Size . \p HstPtr is used to assist the
3438
/// allocation.
35-
virtual void *allocate(size_t Size, void *HstPtr,
36-
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
39+
virtual Expected<void *>
40+
allocate(size_t Size, void *HstPtr,
41+
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
3742

38-
virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
43+
/// Delete the pointer \p TgtPtr on the device
44+
virtual Error free(void *TgtPtr,
45+
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
3946
};
4047

4148
/// Class of memory manager. The memory manager is per-device by using
@@ -133,17 +140,17 @@ class MemoryManagerTy {
133140
size_t SizeThreshold = 1U << 13;
134141

135142
/// Request memory from target device
136-
void *allocateOnDevice(size_t Size, void *HstPtr) const {
143+
Expected<void *> allocateOnDevice(size_t Size, void *HstPtr) const {
137144
return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE);
138145
}
139146

140147
/// Deallocate data on device
141-
int deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
148+
Error deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
142149

143150
/// This function is called when it tries to allocate memory on device but the
144151
/// device returns out of memory. It will first free all memory in the
145152
/// FreeList and try to allocate again.
146-
void *freeAndAllocate(size_t Size, void *HstPtr) {
153+
Expected<void *> freeAndAllocate(size_t Size, void *HstPtr) {
147154
std::vector<void *> RemoveList;
148155

149156
// Deallocate all memory in FreeList
@@ -153,7 +160,8 @@ class MemoryManagerTy {
153160
if (List.empty())
154161
continue;
155162
for (const NodeTy &N : List) {
156-
deleteOnDevice(N.Ptr);
163+
if (auto Err = deleteOnDevice(N.Ptr))
164+
return Err;
157165
RemoveList.push_back(N.Ptr);
158166
}
159167
FreeLists[I].clear();
@@ -174,14 +182,22 @@ class MemoryManagerTy {
174182
/// allocate directly on the device. If a \p nullptr is returned, it might
175183
/// be because the device is OOM. In that case, it will free all unused
176184
/// memory and then try again.
177-
void *allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr) {
178-
void *TgtPtr = allocateOnDevice(Size, HstPtr);
185+
Expected<void *> allocateOrFreeAndAllocateOnDevice(size_t Size,
186+
void *HstPtr) {
187+
auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr);
188+
if (!TgtPtrOrErr)
189+
return TgtPtrOrErr.takeError();
190+
191+
void *TgtPtr = *TgtPtrOrErr;
179192
// We cannot get memory from the device. It might be due to OOM. Let's
180193
// free all memory in FreeLists and try again.
181194
if (TgtPtr == nullptr) {
182195
DP("Failed to get memory on device. Free all memory in FreeLists and "
183196
"try again.\n");
184-
TgtPtr = freeAndAllocate(Size, HstPtr);
197+
TgtPtrOrErr = freeAndAllocate(Size, HstPtr);
198+
if (!TgtPtrOrErr)
199+
return TgtPtrOrErr.takeError();
200+
TgtPtr = *TgtPtrOrErr;
185201
}
186202

187203
if (TgtPtr == nullptr)
@@ -203,16 +219,17 @@ class MemoryManagerTy {
203219

204220
/// Destructor
205221
~MemoryManagerTy() {
206-
for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end();
207-
++Itr) {
208-
assert(Itr->second.Ptr && "nullptr in map table");
209-
deleteOnDevice(Itr->second.Ptr);
222+
for (auto &PtrToNode : PtrToNodeTable) {
223+
assert(PtrToNode.second.Ptr && "nullptr in map table");
224+
if (auto Err = deleteOnDevice(PtrToNode.second.Ptr))
225+
REPORT("Failure to delete memory: %s\n",
226+
toString(std::move(Err)).data());
210227
}
211228
}
212229

213230
/// Allocate memory of size \p Size from target device. \p HstPtr is used to
214231
/// assist the allocation.
215-
void *allocate(size_t Size, void *HstPtr) {
232+
Expected<void *> allocate(size_t Size, void *HstPtr) {
216233
// If the size is zero, we will not bother the target device. Just return
217234
// nullptr directly.
218235
if (Size == 0)
@@ -227,11 +244,14 @@ class MemoryManagerTy {
227244
DP("%zu is greater than the threshold %zu. Allocate it directly from "
228245
"device\n",
229246
Size, SizeThreshold);
230-
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
247+
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
248+
if (!TgtPtrOrErr)
249+
return TgtPtrOrErr.takeError();
231250

232-
DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
251+
DP("Got target pointer " DPxMOD ". Return directly.\n",
252+
DPxPTR(*TgtPtrOrErr));
233253

234-
return TgtPtr;
254+
return *TgtPtrOrErr;
235255
}
236256

237257
NodeTy *NodePtr = nullptr;
@@ -259,8 +279,11 @@ class MemoryManagerTy {
259279
if (NodePtr == nullptr) {
260280
DP("Cannot find a node in the FreeLists. Allocate on device.\n");
261281
// Allocate one on device
262-
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
282+
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
283+
if (!TgtPtrOrErr)
284+
return TgtPtrOrErr.takeError();
263285

286+
void *TgtPtr = *TgtPtrOrErr;
264287
if (TgtPtr == nullptr)
265288
return nullptr;
266289

@@ -281,7 +304,7 @@ class MemoryManagerTy {
281304
}
282305

283306
/// Deallocate memory pointed by \p TgtPtr
284-
int free(void *TgtPtr) {
307+
Error free(void *TgtPtr) {
285308
DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
286309

287310
NodeTy *P = nullptr;
@@ -313,7 +336,7 @@ class MemoryManagerTy {
313336
FreeLists[B].insert(*P);
314337
}
315338

316-
return OFFLOAD_SUCCESS;
339+
return Error::success();
317340
}
318341

319342
/// Get the size threshold from the environment variable
@@ -343,4 +366,6 @@ class MemoryManagerTy {
343366
constexpr const size_t MemoryManagerTy::BucketSize[];
344367
constexpr const int MemoryManagerTy::NumBuckets;
345368

369+
} // namespace llvm
370+
346371
#endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_H

0 commit comments

Comments
 (0)