diff --git a/api/cl_khr_cooperative_matrix.asciidoc b/api/cl_khr_cooperative_matrix.asciidoc new file mode 100644 index 00000000..9532b998 --- /dev/null +++ b/api/cl_khr_cooperative_matrix.asciidoc @@ -0,0 +1,32 @@ +// Copyright 2023-2026 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_cooperative_matrix.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2026-01-23 +*IP Status*:: + No known IP claims. +*Contributors*:: + * Kévin Petit, Arm Ltd. + * Ben Ashbaugh, Intel + * Sven van Haastregt, Arm Ltd. + * Jose Lopez, Qualcomm + * Nico Reissmann, Arm Ltd. + * Martin Fredin, Arm Ltd. + +=== Description + +{cl_khr_cooperative_matrix_EXT} adds support for matrix multiplication +operations in which multiple work-items cooperate. + +This extension depends on `SPV_KHR_cooperative_matrix`. + +include::{generated}/meta/interfaces/cl_khr_cooperative_matrix.txt[] + +=== Version History + + * Revision 1.0.0, 2026-01-23 + ** First assigned version. diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 21286361..97cbcf8e 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -2212,6 +2212,21 @@ include::{generated}/api/version-notes/CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_ | Returns the version of the C++ for OpenCL language supported by the device compiler. endif::cl_ext_cxx_for_opencl[] + +ifdef::cl_khr_cooperative_matrix[] +| {CL_DEVICE_COOPERATIVE_MATRIX_POINTER_ALIGNMENT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COOPERATIVE_MATRIX_POINTER_ALIGNMENT_KHR.asciidoc[] + | {cl_uint_TYPE} + | Returns the minimum pointer alignment in bytes required for cooperative matrix load/store operations. + +| {CL_DEVICE_COOPERATIVE_MATRIX_STRIDE_MULTIPLE_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COOPERATIVE_MATRIX_STRIDE_MULTIPLE_KHR.asciidoc[] + | {cl_uint_TYPE} + | Returns the size in bytes strides used in cooperative matrix load/store operations must be a multiple of. +endif::cl_khr_cooperative_matrix[] + |==== ifdef::cl_khr_integer_dot_product[] @@ -2342,6 +2357,136 @@ include::{generated}/api/structs/cl_device_pci_bus_info_khr.txt[] -- endif::cl_khr_pci_bus_info[] +ifdef::cl_khr_cooperative_matrix[] +[open,refpage='clGetDeviceCooperativeMatrixInfoKHR',desc='Returns information about cooperative matrix for a device.',type='protos'] +-- +To return information about cooperative matrix for a device, call the function + +include::{generated}/api/protos/clGetDeviceCooperativeMatrixInfoKHR.txt[] +include::{generated}/api/version-notes/clGetDeviceCooperativeMatrixInfoKHR.asciidoc[] + + * _device_ may be a device returned by {clGetDeviceIDs} or a sub-device + created by {clCreateSubDevices}. If device is a sub-device, the specific + information for the sub-device will be returned. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetDeviceCooperativeMatrixInfoKHR} is described in the + <> table. + * _input_value_size_ is used to specify the size in bytes of memory pointed to + by _input_value_. + This size must be equal to the size of input type as described in the table below. + * _input_value_ is a pointer to memory where the appropriate parameterization + of the query is passed from. + If _input_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + specified in the <> table. + If _param_value_ is `NULL`, it is ignored. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[device-cooperative-matrix-info-table]] +.List of supported param_names by {clGetDeviceCooperativeMatrixInfoKHR} +[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] +|==== +| Device Cooperative Matrix Info | Input Type | Return Type | Description + +| {CL_DEVICE_COOPERATIVE_MATRIX_DEFAULT_SUB_GROUP_VARIANTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COOPERATIVE_MATRIX_DEFAULT_SUB_GROUP_VARIANTS_KHR.asciidoc[] + | - + | `cl_device_cooperative_matrix_variant_khr[]` + | Describe the list of cooperative matrix operation sub-group scope variants + supported by the device for the default sub-group size. + +| {CL_DEVICE_COOPERATIVE_MATRIX_SUB_GROUP_VARIANTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COOPERATIVE_MATRIX_SUB_GROUP_VARIANTS_KHR.asciidoc[] + | {cl_uint_TYPE} + | `cl_device_cooperative_matrix_variant_khr[]` + | Describe the list of cooperative matrix operation sub-group scope variants + supported by the device for the sub-group size provided via _input_value_. +|==== + +// refError + +{clGetDeviceCooperativeMatrixInfoKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_DEVICE} + ** if _device_ is not a valid device + * {CL_INVALID_OPERATION} + ** if _device_ does not support the `cl_khr_cooperatrive_matrix` extension. + * {CL_INVALID_VALUE} + ** if _param_name_ is not one of the supported values + ** if the size in bytes specified by _param_value_size_ is less than size of + the return type specified in the + <> table + and _param_value_ is not `NULL`. + ** if _param_name_ requires an _input_value_ and _input_value_ is `NULL`. + ** if _param_name_ requires an _input_value_ and the size in bytes specified by + _input_value_size_ does not match the size of the input type specified in the + <> table. + * {CL_OUT_OF_RESOURCES} + ** if there is a failure to allocate resources required by the OpenCL + implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} + ** if there is a failure to allocate resources required by the OpenCL + implementation on the host. +-- + +[open,refpage='cl_device_cooperative_matrix_variant_khr',desc='Structure describing a supported cooperative matrix variant',type='structs'] +-- +[[cooperative-matrix-variant]] +Each {cl_device_cooperative_matrix_variant_khr_TYPE} structure describes a +single supported combination of matrix sizes, component types, and +saturation behavior supported by a device for cooperative matrix multiply/add +operations: + +include::{generated}/api/structs/cl_device_cooperative_matrix_variant_khr.txt[] + + * _m_size_ is number of rows in matrices `A`, `C`, and `Result`. + * _n_size_ is number of columns in matrices `B`, `C`, and `Result`. + * _k_size_ is number of columns in matrix `A` and rows in matrix `B`. + * _a_type_ is the component type of matrix `A`, of type {cl_device_cooperative_matrix_component_type_khr_TYPE}. + * _b_type_ is the component type of matrix `B`, of type {cl_device_cooperative_matrix_component_type_khr_TYPE}. + * _c_type_ is the component type of matrix `C`, of type {cl_device_cooperative_matrix_component_type_khr_TYPE}. + * _result_type_ is the component type of matrix `Result`, of type {cl_device_cooperative_matrix_component_type_khr_TYPE}. + * _saturating_accumulation_ indicates whether the `SaturatingAccumulation` + operand to `OpCooperativeMatrixMulAddKHR` must be present. If it is + {CL_TRUE}, the `SaturatingAccumulation` operand must be present. If it + is {CL_FALSE}, the `SaturatingAccumulation` operand must not be present. +-- + +[open,refpage='cl_device_cooperative_matrix_component_type_khr',desc='Enumeration describing the component type of a cooperative matrix',type='defines'] +-- +The {cl_device_cooperative_matrix_component_type_khr_TYPE} enumeration +describes the component type of a cooperative matrix: + +include::{generated}/api/defines/cl_device_cooperative_matrix_component_type_khr.txt[] + + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_FP16_KHR_anchor} corresponds to SPIR-V `OpTypeFloat 16`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_FP32_KHR_anchor} corresponds to SPIR-V `OpTypeFloat 32`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_FP64_KHR_anchor} corresponds to SPIR-V `OpTypeFloat 64`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_SINT8_KHR_anchor} corresponds to SPIR-V `OpTypeInt 8 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_SINT16_KHR_anchor} corresponds to SPIR-V `OpTypeInt 16 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_SINT32_KHR_anchor} corresponds to SPIR-V `OpTypeInt 32 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_SINT64_KHR_anchor} corresponds to SPIR-V `OpTypeInt 64 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_UINT8_KHR_anchor} corresponds to SPIR-V `OpTypeInt 8 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_UINT16_KHR_anchor} corresponds to SPIR-V `OpTypeInt 16 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_UINT32_KHR_anchor} corresponds to SPIR-V `OpTypeInt 32 0`. + * {CL_DEVICE_COOPERATIVE_MATRIX_COMPONENT_TYPE_UINT64_KHR_anchor} corresponds to SPIR-V `OpTypeInt 64 0`. +-- + +endif::cl_khr_cooperative_matrix[] + + [open,refpage='clGetDeviceAndHostTimer',desc='Query synchronized host and device timestamps',type='protos'] -- To query device and host timestamps, call the function: diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index 2a769add..52be3fe0 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -412,6 +412,75 @@ If the OpenCL environment supports the extension `cl_khr_spirv_queries`, then: * For each of the capabilities returned by the query `CL_DEVICE_SPIRV_CAPABILITIES_KHR`, it is valid to declare the SPIR-V capability in a SPIR-V module using *OpCapability*. Some capabilities may additionally require a specific SPIR-V version, or a SPIR-V extension to be declared via *OpExtension*. +==== `cl_khr_cooperative_matrix` + +If the OpenCL environment supports the extension `cl_khr_cooperative_matrix`, then the environment must +accept modules that declare use of the extension `SPV_KHR_cooperative_matrix` via *OpExtension*. + +If the OpenCL environment supports the extension `cl_khr_cooperative_matrix` and use of the SPIR-V extension `SPV_KHR_cooperative_matrix` is declared in the module via *OpExtension*, then the environment must accept modules that declare the following SPIR-V capability: + +* *CooperativeMatrixKHR* + +For *OpTypeCooperativeMatrixKHR*, the component type, number of rows and number +of columns must match one of the supported combinations reported via the +{clGetDeviceCooperativeMatrixInfoKHR} device query, otherwise +the behavior of the program is undefined. + +For *OpTypeCooperativeMatrixMulAddKHR*, the operands must match a supported +{cl_device_cooperative_matrix_variant_khr_TYPE} such that: + +* The type of _A_ must have _Rows_ match `cl_device_cooperative_matrix_variant_khr::m_size`, +_Columns_ match `cl_device_cooperative_matrix_variant_khr::k_size`, _Use_ be *MatrixAKHR*, +and _ComponentType_ match `cl_device_cooperative_matrix_variant_khr::a_type`. + +* The type of _B_ must have _Rows_ match `cl_device_cooperative_matrix_variant_khr::k_size`, +_Columns_ match `cl_device_cooperative_matrix_variant_khr::n_size`, _Use_ be *MatrixBKHR*, +and _ComponentType_ match `cl_device_cooperative_matrix_variant_khr::b_type`. + +* The type of _C_ must have _Rows_ match `cl_device_cooperative_matrix_variant_khr::m_size`, +_Columns_ match `cl_device_cooperative_matrix_variant_khr::n_size`, _Use_ be *MatrixAccumulatorKHR*, +and _ComponentType_ match `cl_device_cooperative_matrix_variant_khr::c_type`. + +* The type of _Result_ must have _Rows_ match `cl_device_cooperative_matrix_variant_khr::m_size`, +_Columns_ match `cl_device_cooperative_matrix_variant_khr::n_size`, _Use_ be *MatrixAccumulatorKHR*, +and _ComponentType_ match `cl_device_cooperative_matrix_variant_khr::result_type`. + +* If and only if `cl_device_cooperative_matrix_variant_khr::a_type` is a signed +integer type, *MatrixASignedComponents* must be used. + +* If and only if `cl_device_cooperative_matrix_variant_khr::b_type` is a signed +integer type, *MatrixBSignedComponents* must be used. + +* If and only if `cl_device_cooperative_matrix_variant_khr::c_type` is a signed +integer type, *MatrixCSignedComponents* must be used. + +* If and only if `cl_device_cooperative_matrix_variant_khr::result_type` is a signed +integer type, *MatrixResultSignedComponents* must be used. + +* If and only if `cl_device_cooperative_matrix_variant_khr::saturating_accumulation` +is `CL_TRUE`, *SaturatingAccumulationKHR* must be used. + +* If and only if `cl_device_cooperative_matrix_variant_khr::saturating_accumulation` +is `CL_FALSE`, *SaturatingAccumulationKHR* must not be used. + +* The scope of all cooperative matrix operands must be *Subgroup*. + +* Behavior is undefined if *OpCooperativeMatrixLoadKHR*, +*OpCooperativeMatrixStoreKHR*, or *OpCooperativeMatrixMulAddKHR* acting on +cooperative matrix objects with *Subgroup* scope are dynamically executed +by a kernel that does not use full subgroups. + +For *OpCooperativeMatrixLoadKHR* or *OpCooperativeMatrixStoreKHR*: + +* the _Storage Class_ of the _Pointer_ operand must be limited to *Workgroup* or + *CrossWorkgroup*. +* _Pointer_ must be aligned to at least the larger of the size of the elements + of the matrix or {CL_DEVICE_COOPERATIVE_MATRIX_POINTER_ALIGNMENT_KHR} for the + device. +* _Stride_ must be a multiple of the larger of the size of the elements of the + matrix or {CL_DEVICE_COOPERATIVE_MATRIX_STRIDE_MULTIPLE_KHR}, divided by the + size of the type pointed to by _Pointer_. + === Embedded Profile Extensions ==== `cles_khr_int64` diff --git a/scripts/gen_extension_interfaces.py b/scripts/gen_extension_interfaces.py index 82c1d376..1da07ae7 100644 --- a/scripts/gen_extension_interfaces.py +++ b/scripts/gen_extension_interfaces.py @@ -60,6 +60,7 @@ def GetHeader(): iface_file.write('=== New Types\n\n') for ty in types: iface_file.write(' * {{{}_TYPE}}\n'.format(ty.get('name'))) + iface_file.write('\n') # New enums first_enum = True diff --git a/xml/cl.xml b/xml/cl.xml index 786ccf14..3ba472f6 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -266,6 +266,8 @@ server's OpenCL/api-docs repository. typedef cl_properties cl_svm_free_properties_khr; typedef cl_bitfield cl_svm_free_flags_khr; typedef cl_uint cl_svm_pointer_info_khr; + typedef cl_uint cl_device_cooperative_matrix_component_type_khr; + typedef cl_uint cl_device_cooperative_matrix_info_khr; Structure types @@ -471,6 +473,16 @@ server's OpenCL/api-docs repository. CL_SVM_CAPABILITY_CONCURRENT_ATOMIC_ACCESS_KHR | \ CL_SVM_CAPABILITY_INDIRECT_ACCESS_KHR) + + cl_uint m_size + cl_uint n_size + cl_uint k_size + cl_device_cooperative_matrix_component_type_khr a_type + cl_device_cooperative_matrix_component_type_khr b_type + cl_device_cooperative_matrix_component_type_khr c_type + cl_device_cooperative_matrix_component_type_khr result_type + cl_bool saturating_accumulation + @@ -1525,6 +1537,19 @@ server's OpenCL/api-docs repository. + + + + + + + + + + + + + @@ -1693,7 +1718,11 @@ server's OpenCL/api-docs repository. - + + + + + @@ -4609,6 +4638,16 @@ server's OpenCL/api-docs repository. cl_context context cl_perf_hint_qcom perf_hint + + cl_int clGetDeviceCooperativeMatrixInfoKHR + cl_device_id device + cl_device_cooperative_matrix_info_khr param_name + size_t input_value_size + const void* input_value + size_t param_value_size + void* param_value + size_t* param_value_size_ret + @@ -8053,5 +8092,36 @@ server's OpenCL/api-docs repository. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +