Skip to content

Commit 1c6bdce

Browse files
authored
Merge pull request #45 from GPUOpen-LibrariesAndSDKs/next-release-8
HIPRT v3
2 parents a1fc965 + 96552af commit 1c6bdce

File tree

649 files changed

+174926
-3162
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

649 files changed

+174926
-3162
lines changed

.clang-format

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ BasedOnStyle: LLVM
22
IndentWidth: 4
33
TabWidth: 4
44
UseTab: Always
5-
BreakBeforeBraces: Custom
65
ColumnLimit: 128
76
AllowShortBlocksOnASingleLine: false
87
AllowShortIfStatementsOnASingleLine: true

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ hiprt/cache/Kernels.h
1515
hiprt/cache/KernelArgs.h
1616
PUBLIC_OUT/
1717
hiprt/impl/bvh_build_array.h
18+
scripts/bitcodes/__pycache__/

CMakeLists.txt

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.10)
1+
cmake_minimum_required(VERSION 3.19) # at least 3.19 in order to have the compression-level in Zstd
22
project(hiprt)
33

44
#
@@ -15,6 +15,7 @@ option(HIPRTEW "Use hiprtew" OFF)
1515
option(NO_ENCRYPT "Don't encrypt kernel source and binaries" OFF)
1616
option(NO_UNITTEST "Don't build unit tests" OFF)
1717
option(HIPRT_PREFER_HIP_5 "Prefer HIP 5" OFF)
18+
option(COMPILED_COMPRESSION "enable compression of compiled kernels" ON) # this argument is only used if BAKE_COMPILED_KERNEL is enabled -- advised to let it 'ON' as it's the path tested by the HIPRT team.
1819

1920
option(FORCE_DISABLE_CUDA "By default Cuda support is automatically added if a Cuda install is detected. Turn this flag to ON to force Cuda to be disabled." OFF)
2021

@@ -388,6 +389,10 @@ set(KERNEL_HIPRT_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_
388389
set(KERNEL_UNITTEST_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_str_}_${HIP_VERSION_STR}_precompiled_bitcode_${KERNEL_OS_POSTFIX}.hipfb") # example: hiprt02005_6.2_precompiled_bitcode_win.hipfb
389390
set(KERNEL_OROCHI_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/oro_compiled_kernels.hipfb")
390391

392+
# temp files: compiled kernel, compressed.
393+
set(KERNEL_HIPRT_COMP_COMPRESSED "${CMAKE_BINARY_DIR}/hiprt${version_str_}_${HIP_VERSION_STR}_amd.zstd" )
394+
set(KERNEL_OROCHI_COMP_COMPRESSED "${CMAKE_BINARY_DIR}/oro_compiled_kernels.zstd" )
395+
391396

392397
# precompile kernels:
393398
if(PRECOMPILE)
@@ -406,19 +411,16 @@ if(PRECOMPILE)
406411
${CMAKE_SOURCE_DIR}/hiprt/impl/AabbList.h
407412
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhCommon.h
408413
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhNode.h
409-
${CMAKE_SOURCE_DIR}/hiprt/impl/Geometry.h
410414
${CMAKE_SOURCE_DIR}/hiprt/impl/QrDecomposition.h
411415
${CMAKE_SOURCE_DIR}/hiprt/impl/Quaternion.h
412416
${CMAKE_SOURCE_DIR}/hiprt/impl/Transform.h
413417
${CMAKE_SOURCE_DIR}/hiprt/impl/Instance.h
414418
${CMAKE_SOURCE_DIR}/hiprt/impl/InstanceList.h
415419
${CMAKE_SOURCE_DIR}/hiprt/impl/MortonCode.h
416-
${CMAKE_SOURCE_DIR}/hiprt/impl/Scene.h
417420
${CMAKE_SOURCE_DIR}/hiprt/impl/TriangleMesh.h
418421
${CMAKE_SOURCE_DIR}/hiprt/impl/Triangle.h
419422
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhBuilderUtil.h
420423
${CMAKE_SOURCE_DIR}/hiprt/impl/SbvhCommon.h
421-
${CMAKE_SOURCE_DIR}/hiprt/impl/ApiNodeList.h
422424
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhConfig.h
423425
${CMAKE_SOURCE_DIR}/hiprt/impl/MemoryArena.h
424426
${CMAKE_SOURCE_DIR}/hiprt/hiprt_types.h
@@ -479,11 +481,16 @@ if ( BAKE_COMPILED_KERNEL )
479481

480482
set(PYTHON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/scripts/convert_binary_to_array.py")
481483

484+
set(ARCHIVE_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/scripts/create_archive.cmake")
485+
482486
# HIPRT binary
483487
set(KERNEL_HIPRT_H "${CMAKE_CURRENT_SOURCE_DIR}/hiprt/impl/bvh_build_array.h")
484488
add_custom_command(
485489
OUTPUT ${KERNEL_HIPRT_H}
486-
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_H}
490+
# 1) Create the Zstd archive
491+
COMMAND ${CMAKE_COMMAND} -DINPUT_FILE=${KERNEL_HIPRT_COMP} -DOUTPUT_FILE=${KERNEL_HIPRT_COMP_COMPRESSED} -DDO_COMPRESS=${COMPILED_COMPRESSION} -P ${ARCHIVE_SCRIPT}
492+
# 2) Run the Python converter on that archive
493+
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_COMP_COMPRESSED} ${KERNEL_HIPRT_H} ${COMPILED_COMPRESSION}
487494
DEPENDS ${KERNEL_HIPRT_COMP} # Ensure compile.py has already run.
488495
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
489496
COMMENT "Converting HIPRT compiled kernel to header"
@@ -494,7 +501,10 @@ if ( BAKE_COMPILED_KERNEL )
494501
set(KERNEL_OROCHI_H "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/ParallelPrimitives/cache/oro_compiled_kernels.h")
495502
add_custom_command(
496503
OUTPUT ${KERNEL_OROCHI_H}
497-
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_H}
504+
# 1) Create the Zstd archive
505+
COMMAND ${CMAKE_COMMAND} -DINPUT_FILE=${KERNEL_OROCHI_COMP} -DOUTPUT_FILE=${KERNEL_OROCHI_COMP_COMPRESSED} -DDO_COMPRESS=${COMPILED_COMPRESSION} -P ${ARCHIVE_SCRIPT}
506+
# 2) Run the Python converter on that archive
507+
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_COMP_COMPRESSED} ${KERNEL_OROCHI_H} ${COMPILED_COMPRESSION}
498508
DEPENDS ${KERNEL_OROCHI_COMP} # Ensure compile.py has already run.
499509
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
500510
COMMENT "Converting Orochi compiled kernel to header"
@@ -533,8 +543,40 @@ endif()
533543

534544

535545
if ( BAKE_COMPILED_KERNEL )
546+
547+
548+
if ( COMPILED_COMPRESSION )
549+
# Gather minimal Zstd sources
550+
file(GLOB ZSTD_SRCS
551+
contrib/zstd/lib/common/*.c
552+
contrib/zstd/lib/decompress/*.c
553+
)
554+
555+
# Build a static lib zstd_embedded
556+
add_library(zstd_embedded STATIC
557+
${ZSTD_SRCS}
558+
)
559+
560+
# Include Zstd headers
561+
target_include_directories(zstd_embedded
562+
PUBLIC
563+
contrib/zstd/lib
564+
)
565+
566+
set_target_properties(zstd_embedded PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC
567+
target_compile_definitions(zstd_embedded PRIVATE ZSTD_DISABLE_ASM) # disable ASM for easier build
568+
569+
# Link against zstd_embedded
570+
target_link_libraries(${HIPRT_NAME} zstd_embedded )
571+
572+
# the 'ORO_LINK_ZSTD' flag enables use of ZSTD API in the source code.
573+
target_compile_definitions(${HIPRT_NAME} PRIVATE ORO_LINK_ZSTD)
574+
endif()
575+
576+
577+
536578
# enable the 'BAKE_COMPILED_KERNEL' on Orochi: this mode is activated by adding those 2 defines.
537-
target_compile_definitions(${HIPRT_NAME} PRIVATE ORO_PP_LOAD_FROM_STRING ORO_PRECOMPILED)
579+
target_compile_definitions(${HIPRT_NAME} PRIVATE ORO_PP_LOAD_FROM_STRING HIPRT_BITCODE_LINKING ORO_PRECOMPILED)
538580

539581
#enable the 'BAKE_COMPILED_KERNEL' on HIPRT:
540582
target_compile_definitions(${HIPRT_NAME} PRIVATE HIPRT_BAKE_COMPILED_KERNEL )
@@ -592,12 +634,17 @@ if(PRECOMPILE AND NOT BAKE_COMPILED_KERNEL)
592634
DESTINATION bin)
593635
endif()
594636

637+
638+
639+
640+
641+
595642
# Project: Unit Test
596643
if(NOT NO_UNITTEST)
597644

598645
add_executable(unittest)
599646

600-
if(BITCODE)
647+
if(BITCODE OR BAKE_COMPILED_KERNEL)
601648
target_compile_definitions(unittest PRIVATE HIPRT_BITCODE_LINKING)
602649
endif()
603650
if(WIN32)

contrib/Orochi/Orochi/Orochi.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,21 @@ oroError OROAPI oroCtxGetCurrent(oroCtx* pctx)
12191219
if ( e != hipSuccess )
12201220
return hip2oro(e);
12211221
}
1222+
1223+
// externally initialized context
1224+
if( s_oroCtxs.count( ctxt->m_ptr ) == 0 && ctxt->m_ptr )
1225+
{
1226+
ioroCtx_t* c = new ioroCtx_t;
1227+
c->m_ptr = ctxt->m_ptr;
1228+
c->setApi( s_api );
1229+
s_oroCtxs[ctxt->m_ptr] = c;
1230+
}
1231+
1232+
if (s_oroCtxs.count(ctxt->m_ptr) == 0)
1233+
{
1234+
return oroErrorNotReady;
1235+
}
1236+
12221237
( *pctx ) = s_oroCtxs[ctxt->m_ptr];
12231238
delete ctxt;
12241239
return oroSuccess;
@@ -2627,11 +2642,11 @@ oroError_t OROAPI oroModuleLaunchCooperativeKernelMultiDevice(oroFunctionLaunchP
26272642
hipModuleLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags) );
26282643
return oroErrorUnknown;
26292644
}
2630-
oroError_t OROAPI oroModuleLaunchKernel(oroFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, oroStream_t stream, void ** kernelParams, void ** extra)
2645+
oroError_t OROAPI oroModuleLaunchKernel(oroFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, oroStream_t stream, const void* const* kernelParams, const void* const* extra)
26312646
{
26322647
__ORO_FUNC(
26332648
CU4ORO::hipModuleLaunchKernel_cu4oro(__ORO_FORCE_CAST(CU4ORO::hipFunction_t,f), __ORO_FORCE_CAST(unsigned int,gridDimX), __ORO_FORCE_CAST(unsigned int,gridDimY), __ORO_FORCE_CAST(unsigned int,gridDimZ), __ORO_FORCE_CAST(unsigned int,blockDimX), __ORO_FORCE_CAST(unsigned int,blockDimY), __ORO_FORCE_CAST(unsigned int,blockDimZ), __ORO_FORCE_CAST(unsigned int,sharedMemBytes), __ORO_FORCE_CAST(CU4ORO::hipStream_t,stream), __ORO_FORCE_CAST(void **,kernelParams), __ORO_FORCE_CAST(void **,extra)),
2634-
hipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams, extra) );
2649+
hipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, __ORO_FORCE_CAST(void**, kernelParams), __ORO_FORCE_CAST(void**, extra)) );
26352650
return oroErrorUnknown;
26362651
}
26372652
oroError_t OROAPI oroModuleLoad(oroModule_t * module, const char * fname)

contrib/Orochi/Orochi/Orochi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ oroError_t OROAPI oroModuleGetGlobal(oroDeviceptr_t * dptr, size_t * bytes, oroM
991991
oroError_t OROAPI oroModuleGetTexRef(textureReference ** texRef, oroModule_t hmod, const char * name);
992992
oroError_t OROAPI oroModuleLaunchCooperativeKernel(oroFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, oroStream_t stream, void ** kernelParams);
993993
oroError_t OROAPI oroModuleLaunchCooperativeKernelMultiDevice(oroFunctionLaunchParams * launchParamsList, unsigned int numDevices, unsigned int flags);
994-
oroError_t OROAPI oroModuleLaunchKernel(oroFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, oroStream_t stream, void ** kernelParams, void ** extra);
994+
oroError_t OROAPI oroModuleLaunchKernel(oroFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, oroStream_t stream, const void* const* kernelParams, const void* const* extra);
995995
oroError_t OROAPI oroModuleLoad(oroModule_t * module, const char * fname);
996996
oroError_t OROAPI oroModuleLoadData(oroModule_t * module, const void * image);
997997
oroError_t OROAPI oroModuleLoadDataEx(oroModule_t * module, const void * image, unsigned int numOptions, oroJitOption * options, void ** optionValues);

contrib/Orochi/Orochi/OrochiUtils.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
#include <sys/stat.h>
3737
#endif
3838

39+
#ifdef ORO_LINK_ZSTD
40+
#include <contrib/zstd/lib/zstd.h>
41+
#endif
42+
3943
inline std::wstring utf8_to_wstring( const std::string& str )
4044
{
4145
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> myconv;
@@ -790,3 +794,45 @@ void OrochiUtils::launch2D( oroFunction func, int nx, int ny, const void** args,
790794
OROASSERT( e == oroSuccess, 0 );
791795
}
792796

797+
void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const CompressedBuffer& buffer)
798+
{
799+
#ifdef ORO_LINK_ZSTD
800+
out.assign(buffer.uncompressedSize,0);
801+
802+
size_t decompressedSize = ZSTD_decompress(
803+
out.data(), // final uncompressed buffer
804+
out.size(), // final size
805+
buffer.data, // compressed buffer
806+
buffer.size // compressed buffer - size
807+
);
808+
809+
if ( decompressedSize != buffer.uncompressedSize )
810+
throw std::runtime_error( "ERROR: ZSTD_decompress FAILED." );
811+
#else
812+
throw std::runtime_error( "ERROR: ZSTD is not part of this build." );
813+
#endif
814+
return;
815+
}
816+
817+
818+
void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const RawBuffer& buffer)
819+
{
820+
out = std::vector<unsigned char>(buffer.data, buffer.data + buffer.size );
821+
return;
822+
}
823+
824+
825+
void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional<size_t> uncompressed_sizeByte)
826+
{
827+
if (uncompressed_sizeByte.has_value()) {
828+
// if the input buffer is compressed :
829+
CompressedBuffer buffer{ rawData, rawData_sizeByte, uncompressed_sizeByte.value() };
830+
HandlePrecompiled(out, buffer );
831+
} else {
832+
// if the input buffer is not compressed
833+
RawBuffer buffer{ rawData, rawData_sizeByte };
834+
HandlePrecompiled(out, buffer );
835+
}
836+
}
837+
838+

contrib/Orochi/Orochi/OrochiUtils.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <filesystem>
2828
#include <unordered_map>
2929
#include <vector>
30+
#include <optional>
3031

3132
#if defined( GNUC )
3233
#include <signal.h>
@@ -83,6 +84,20 @@ class OrochiUtils
8384
static void getModule( oroDevice device, const char* code, const char* path, std::vector<const char*>* optsIn, const char* funcName, oroModule* moduleOut );
8485
static void launch1D( oroFunction func, int nx, const void** args, int wgSize = 64, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
8586
static void launch2D( oroFunction func, int nx, int ny, const void** args, int wgSizeX = 8, int wgSizeY = 8, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
87+
88+
89+
struct CompressedBuffer {
90+
const unsigned char* data = nullptr; // compressed data
91+
size_t size = 0; // size in byte of 'data'
92+
size_t uncompressedSize = 0; // size of byte of the uncompressed data.
93+
};
94+
struct RawBuffer {
95+
const unsigned char* data = nullptr;
96+
size_t size = 0;
97+
};
98+
static void HandlePrecompiled(std::vector<unsigned char>& out, const CompressedBuffer& buffer);
99+
static void HandlePrecompiled(std::vector<unsigned char>& out, const RawBuffer& buffer);
100+
static void HandlePrecompiled(std::vector<unsigned char>& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional<size_t> uncompressed_sizeByte=std::nullopt);
86101

87102
template<typename T>
88103
static void malloc( T*& ptr, size_t n )

0 commit comments

Comments
 (0)