Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@
} MKLVersion;


/* Apple Accelerate doesn't allow setting the number of threads directly, it only has an
* option to do single-threaded or multi-threaded. That is controlled via the BLASSetThreading
* API introduced in macOS 15.
*
* These constants are from the vecLib.framework/Headers/thread_api.h file
*/
#define ACCELERATE_BLAS_THREADING_MULTI_THREADED 0
#define ACCELERATE_BLAS_THREADING_SINGLE_THREADED 1

/*
* We provide a flexible thread getter/setter interface here; by calling `lbt_set_num_threads()`
* libblastrampoline will propagate the call through to its loaded libraries as long as the
Expand All @@ -50,6 +59,7 @@ static char * getter_names[MAX_THREADING_NAMES] = {
"nvpl_lapack_get_max_threads",
// We special-case MKL in the lookup loop below
//"MKL_Domain_Get_Max_Threads",
// We special-case Apple Accelerate below
NULL
};

Expand All @@ -60,6 +70,7 @@ static char * setter_names[MAX_THREADING_NAMES] = {
"nvpl_lapack_set_num_threads",
// We special-case MKL in the lookup loop below
//"MKL_Domain_Set_Num_Threads",
// We special-case Apple Accelerate below
NULL
};

Expand Down Expand Up @@ -129,6 +140,37 @@ LBT_DLLEXPORT int32_t lbt_get_num_threads() {
}
}
}

// Special case Apple Accelerate because we have to determine if we are single-threaded or multi-threaded
// This API only exists on macOS 15+.
int (*fptr_acc)(void) = lookup_symbol(lib->handle, "BLASGetThreading");
if (fptr_acc != NULL) {
int nthreads = fptr_acc();

if(nthreads == ACCELERATE_BLAS_THREADING_MULTI_THREADED) {
int (*fptr_acc_nthreads)(void) = lookup_symbol(lib->handle, "APPLE_NTHREADS");
if (fptr_acc != NULL) {
// In Accelerate, there is a symbol called APPLE_NTHREADS, which appears to be a function we
// can call to get an integer saying the number of CPU threads. There is no documentation for this
// anywhere accessible online, but testing two different CPUs seem to suggest it is CPU cores.
//
// Doing this:
// julia> @ccall AppleAccelerate.libacc.APPLE_NTHREADS()::Int
//
// The M2 Max returned 12, M4 Max returned 16, which is the total number of cores (both big and little)
// in each processor.
int nthreads = fptr_acc_nthreads();
max_threads = max(max_threads, nthreads);
} else {
// This number is arbitrary because we have no idea how many threads are actually in use,
// but greater than 1 to mean multi-threaded.
max_threads = max(max_threads, 2);
}
} else {
// Single-threaded
max_threads = max(max_threads, 1);
}
}
}
return max_threads;
}
Expand Down Expand Up @@ -157,5 +199,16 @@ LBT_DLLEXPORT void lbt_set_num_threads(int32_t nthreads) {
fptr(nthreads, MKL_DOMAIN_BLAS);
fptr(nthreads, MKL_DOMAIN_LAPACK);
}

// Special case Apple Accelerate because we have to determine if we must set multi-threaded or single-threaded
// This API only exists on macOS 15+.
int (*fptr_acc)(int) = lookup_symbol(lib->handle, "BLASSetThreading");
if (fptr_acc != NULL) {
if(nthreads > 1) {
fptr_acc(ACCELERATE_BLAS_THREADING_MULTI_THREADED);
} else {
fptr_acc(ACCELERATE_BLAS_THREADING_SINGLE_THREADED);
}
}
}
}
93 changes: 93 additions & 0 deletions test/accelerate.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
using Libdl, Test

# Taken from AppleAccelerate.jl to avoid a dependency on it
const libacc = "/System/Library/Frameworks/Accelerate.framework/Accelerate"
const libacc_info_plist = "/System/Library/Frameworks/Accelerate.framework/Versions/Current/Resources/Info.plist"

function get_macos_version(normalize=true)
@static if !Sys.isapple()
return nothing
end

plist_lines = split(String(read("/System/Library/CoreServices/SystemVersion.plist")), "\n")
vers_idx = findfirst(l -> occursin("ProductVersion", l), plist_lines)
if vers_idx === nothing
return nothing
end

m = match(r">([\d\.]+)<", plist_lines[vers_idx+1])
if m === nothing
return nothing
end

ver = VersionNumber(only(m.captures))
if normalize && ver.major == 16
return VersionNumber(26, ver.minor, ver.patch)
end
return ver
end


# Load the Accelerate library
libacc_handle = dlopen(libacc)
@testset "Accelerate ILP64 loading" begin
# ILP64 requires macOS 13.3+
if get_macos_version() >= v"13.3"
# Load the ILP64 interface
lbt_forward(lbt_handle, libacc; clear=true, suffix_hint="\x1a\$NEWLAPACK\$ILP64")

# Test that we have only one library loaded
config = lbt_get_config(lbt_handle)
libs = unpack_loaded_libraries(config)
@test length(libs) == 1

# Test that it's Accelerate and it's correctly identified
@test libs[1].libname == libacc
@test libs[1].interface == LBT_INTERFACE_ILP64

# Test that `dgemm` forwards to `dgemm_` within the Accelerate library
acc_dgemm = dlsym(libacc_handle, "dgemm\$NEWLAPACK\$ILP64")
@test lbt_get_forward(lbt_handle, "dgemm_", LBT_INTERFACE_ILP64) == acc_dgemm
end
end

@testset "Accelerate LP64 loading" begin
# New LAPACK interface requires macOS 13.3+
if get_macos_version() >= v"13.3"
# Load the LP64 interface
lbt_forward(lbt_handle, libacc; clear=true, suffix_hint="\x1a\$NEWLAPACK")

# Test that we have only one library loaded
config = lbt_get_config(lbt_handle)
libs = unpack_loaded_libraries(config)
@test length(libs) == 1

# Test that it's Accelerate and it's correctly identified
@test libs[1].libname == libacc
@test libs[1].interface == LBT_INTERFACE_LP64

# Test that `dgemm` forwards to `dgemm_` within the Accelerate library
acc_dgemm = dlsym(libacc_handle, "dgemm\$NEWLAPACK")
@test lbt_get_forward(lbt_handle, "dgemm_", LBT_INTERFACE_LP64) == acc_dgemm
end
end

@testset "Accelerate threading" begin
# This threading API will only work on v15 and above
if get_macos_version() >= v"15"
lbt_forward(lbt_handle, libacc; clear=true)

# Set to single-threaded
lbt_set_num_threads(lbt_handle, 1)
@test lbt_get_num_threads(lbt_handle) == 1

# Set to multi-threaded
# Accelerate doesn't actually let us say how many threads, so we must test for greater than
lbt_set_num_threads(lbt_handle, 2)
@test lbt_get_num_threads(lbt_handle) > 1

# Set back to single-threaded
lbt_set_num_threads(lbt_handle, 1)
@test lbt_get_num_threads(lbt_handle) == 1
end
end
5 changes: 5 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,8 @@ end

# Run our "direct" tests within Julia
include("direct.jl")

# Run some Apple Accelerate tests, but only on Apple
@static if Sys.isapple()
include("accelerate.jl")
end