Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions apps/iir_blur/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,15 @@ target_link_libraries(iir_blur_filter PRIVATE
# Test that the app actually works!
set(IMAGE ${CMAKE_CURRENT_LIST_DIR}/../images/rgb.png)
if (EXISTS ${IMAGE})
if (Halide_TARGET MATCHES "opencl")
# Error message:
#
# Error: OpenCL error: CL_INVALID_COMMAND_QUEUE clFinish failed
message(WARNING "Skipping Mullapudi2016's GPU auto-schedules for OpenCL target.")
else ()
configure_file(${IMAGE} rgb.png COPYONLY)
add_test(NAME iir_blur_filter
COMMAND iir_blur_filter rgb.png out.png)
set_tests_properties(iir_blur_filter PROPERTIES
LABELS iir_blur
PASS_REGULAR_EXPRESSION "Success!"
SKIP_REGULAR_EXPRESSION "\\[SKIP\\]")
endif ()
configure_file(${IMAGE} rgb.png COPYONLY)
add_test(NAME iir_blur_filter
COMMAND iir_blur_filter rgb.png out.png)
set_tests_properties(iir_blur_filter PROPERTIES
LABELS iir_blur
PASS_REGULAR_EXPRESSION "Success!"
SKIP_REGULAR_EXPRESSION "\\[SKIP\\]"
# Pass in the keyword "metal" etc to skip the test
# explicitly. Buildbot can print a nice test report
# for all skipped tests.
ENVIRONMENT "HL_TARGET=${Halide_TARGET}")
endif ()
43 changes: 43 additions & 0 deletions apps/iir_blur/filter.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <regex>

#include "HalideBuffer.h"
#include "HalideRuntime.h"
Expand All @@ -13,12 +14,54 @@

using namespace Halide::Tools;

namespace {

enum DeviceState {
USING_METAL_OR_OPENCL,
NOT_METAL_OR_OPENCL,
ENV_VARIABLE_ABSENT,
};
DeviceState ensure_cuda_device() {
const auto hl_target = std::getenv("HL_TARGET");
if (hl_target == nullptr) {
printf("Warning: Environment variable HL_TARGET not specified. "
"Proceeding to the tests...\n");
return ENV_VARIABLE_ABSENT;
}

if (std::regex_search(hl_target, std::regex{"metal|opencl"})) {
// note(antonysigma): Error messages if we don't skip the test:
//
// OpenCL error: clFinish timeout.
//
// Metal: copy_to_host() failed. Error
// Domain=MTLCommandBufferErrorDomain Code=2 "Caused GPU Timeout Error
// (00000002:kIOAccelCommandBufferCallbackErrorTimeout)"
// UserInfo={NSLocalizedDescription=Caused GPU Timeout Error
// (00000002:kIOAccelCommandBufferCallbackErrorTimeout)}
printf("[SKIP] Mullapudi2016 experimental GPU schedule "
"generates copy_to_host() function calls that timeout. "
"Target = %s. Skipping...\n",
hl_target);

return USING_METAL_OR_OPENCL;
}

return NOT_METAL_OR_OPENCL;
}

} // namespace

int main(int argc, char **argv) {
if (argc != 3) {
printf("Usage: %s in out\n", argv[0]);
return 1;
}

if (ensure_cuda_device() == USING_METAL_OR_OPENCL) {
return 0;
}

Halide::Runtime::Buffer<float, 3> input = load_and_convert_image(argv[1]);
Halide::Runtime::Buffer<float, 3> output(input.width(), input.height(), input.channels());

Expand Down
4 changes: 2 additions & 2 deletions apps/local_laplacian/process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ DeviceState ensure_cuda_device() {
return ENV_VARIABLE_ABSENT;
}

if (std::regex_search(hl_target, std::regex{"cuda|metal|vulkan|opencl"})) {
if (std::regex_search(hl_target, std::regex{"metal|vulkan|opencl"})) {
// note(antonysigma): Error messages if we don't skip the test:
//
// OpenCL error: CL_INVALID_WORK_GROUP_SIZE clEnqueueNDRangeKernel
Expand All @@ -44,7 +44,7 @@ DeviceState ensure_cuda_device() {
// threadgroup size limit)'
//
// Vulkan: vkQueueWaitIdle returned VK_ERROR_DEVICE_LOST
printf("[SKIP] Mullapudi2016 experimental GPU schedules "
printf("[SKIP] Mullapudi2016 experimental GPU schedule "
"generates the gpu_threads where thread count per block "
"is not an multiple of 32. Target = %s. Skipping...\n",
hl_target);
Expand Down
23 changes: 22 additions & 1 deletion apps/unsharp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,33 @@ find_package(Halide REQUIRED)
# Generator
add_halide_generator(unsharp.generator SOURCES unsharp_generator.cpp)

set(_unsharp_autoscheduler_params autoscheduler.experimental_gpu_schedule=1)

if(Halide_TARGET MATCHES "cuda|opencl|vulkan")
# Last level cache size estimate of the Nvidia GPU on the Buildbot. Hand
# tuned to pass the Builbot tests.
list(APPEND _unsharp_autoscheduler_params
autoscheduler.last_level_cache_size=20000
)
elseif(Halide_TARGET MATCHES "metal")
# Resolving Metal error regarding the threads per GPU block limit:
#
# -[MTLDebugComputeCommandEncoder _validateThreadsPerThreadgroup:]:1267:
# failed assertion `(threadsPerThreadgroup.width(70) *
# threadsPerThreadgroup.height(8) * threadsPerThreadgroup.depth(1))(560)
# must be <= 448. (kernel threadgroup size limit)`
list(APPEND _unsharp_autoscheduler_params
autoscheduler.last_level_cache_size=20000
autoscheduler.parallelism=32
)
endif()

# Filters
add_halide_library(unsharp FROM unsharp.generator)
add_halide_library(unsharp_auto_schedule FROM unsharp.generator
GENERATOR unsharp
AUTOSCHEDULER Halide::Mullapudi2016
PARAMS autoscheduler.last_level_cache_size=20000 autoscheduler.experimental_gpu_schedule=1)
PARAMS ${_unsharp_autoscheduler_params})

# Main executable
add_executable(unsharp_filter filter.cpp)
Expand Down
Loading