diff --git a/CMakeLists.txt b/CMakeLists.txt index fb0f43972..7e137c247 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.12) #CONDITIONAL FLAG(to run simulation on CPU or GPU) # #For GPU: -#set(ENABLE_CUDA YES) +set(ENABLE_CUDA YES) # #For CPU: #set(ENABLE_CUDA NO) @@ -31,12 +31,13 @@ endif() # to your desired architecture: \ # #cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 .. -# +# #"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable. -#If no TARGET_ARCH is passed in then it will default to 37 which is the kepler architecture +#If TARGET_ARCH is not user-specified then it will default to native. ############################################################################################ if(NOT DEFINED TARGET_ARCH) - set(TARGET_ARCH 37) +# If target not specified by user, use local machine's GPU architecture + set(TARGET_ARCH "native") endif() #CONDITIONAL FLAG to turn on the validation mode @@ -50,14 +51,21 @@ if(ENABLE_CUDA) message("\n----Generating Makefile for Graphitti GPU version----") project(Graphitti LANGUAGES CXX CUDA C) #Verify CUDA package is present - find_Package(CUDA REQUIRED) + find_package(CUDA REQUIRED) #Set the USE_GPU preprocessor macro so that GPU code will be compiled. add_compile_definitions(USE_GPU) -#Specify the CUDA architecture / gencode that will be targeted - ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware - set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH}) + +# Specify the CUDA architecture / gencode that will be targeted +# Set gencode and architecture variables to the correct values for your specific NVIDIA hardware +# Sets real and virtual architecture switch +# sm_${TARGET_ARCH} = Real architecture +# compute_${TARGET_ARCH} = Virtual architecture +# (embeds PTX code e.g. Parallel Thread Execution. +# If on a newer GPU, then the CUDA driver compiles a working binary) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH},compute_${TARGET_ARCH}") message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") + else() message("\n----Generating Makefile for Graphitti CPU version----") diff --git a/docs/Developer/GPUArchLevels.md b/docs/Developer/GPUArchLevels.md new file mode 100644 index 000000000..abfbd5f7b --- /dev/null +++ b/docs/Developer/GPUArchLevels.md @@ -0,0 +1,41 @@ +# GPU Architecture Levels +Originally, Graphitti used CUDA Compute Capability (CC 3.5). This later changed to use a flexible architecture model, such that Graphitti can now be compiled for modern hardware (such as Volta or Lovelace). This allows the simulator to leverage modern GPU features while maintaining backwards compatibility for older cards by using conditional compilation. + +# Supported Architectures + +| Architecture | Compute Capability | Project Build Compatibility | +| :--- | :--- | :--- | +| **Kepler** | 3.5 / 3.7 | **Baseline**: Minimum version for backwards compatibility. | +| **Volta** | 7.0 | **Target**: Primary architecture for high-performance server runs. | +| **Ampere** | 8.0 / 8.6 | **Development**: Common for modern local development. | +| **Ada Lovelace**| 8.9 | **Current**: Latest generation available in the lab. | + +## Compute Capability +* Binary Compatibility (cubin): Strictly backwards compatible. A binary for raiju (3.7) runs on ghidorah (8.9), but not vice versa. +- Backwards Compatibility: Any code compiled on an older architecture will work on newer ones. + +- Forwards Compatibility (PTX): Any code compiled for a specific architecture will require that or a newer one to run. + - Parallel Thread Execution (PTX): PTX is included in lab builds. + +# Specifying Target Architecture + +By default, if not user-specified, `TARGET_ARCH` is set to `"native"` +which auto-detects and utilizes local hardware. + +Legacy Support: -DTARGET_ARCH=35 + +Otachi Server: -DTARGET_ARCH=70 + +# Performance Notes +## Lab Servers Reference Table +Info retrieved from `nvidia-smi` (in terminal). + +| Lab Server | GPU Model | Architecture | Compute Capability | Recommended `TARGET_ARCH` | +| :--- | :--- | :--- | :--- | :--- | +| **raiju** | Tesla K80 | **Kepler** | 3.7 | `37` | +| **otachi** | Tesla V100-PCIE-16GB | **Volta** | 7.0 | `70` | +| **ghidorah** | RTX 4500 Ada Generation | **Ada Lovelace** | 8.9 | `89` | + +Note: Per project guidelines, conditional compilation adds structural complexity (code cruft). We only implement architecture-specific paths if they produce a measurable benefit. + +- Example: Run a 5-10 minute simulation and then measure performance `nvidia-smi` and compare it to baseline results.