Skip to content

Commit 6c915de

Browse files
1 parent 7ae1692 commit 6c915de

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

tensorflow/core/kernels/reduction_gpu_kernels.cu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ inline bool isGfx10orGfx11(OpKernelContext* ctx) {
725725
result = hipGetDeviceProperties(&props, dev);
726726
if (result == hipSuccess) {
727727
std::string gcnArchName = props.gcnArchName;
728-
return (gcnArchName.substr(0,5)=="gfx10" || gcnArchName.substr(0,5)=="gfx11");
728+
return gcnArchName.substr(0,4)=="gfx1";
729729
}
730730
return false;
731731
}

tensorflow/core/util/gpu_launch_config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
203203
#elif TENSORFLOW_USE_ROCM
204204
hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
205205
&block_count, func, fixed_block_size, dynamic_shared_memory_size);
206+
if (block_count < 1)
207+
block_count = 1;
206208
CHECK_EQ(err, hipSuccess);
207209
#endif
208210
block_count = std::min(block_count * d.getNumGpuMultiProcessors(),

0 commit comments

Comments
 (0)