Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
85d6cf5
add spinlock detection tool
William-An Sep 15, 2025
9a23e6b
use dprintf for debug msg
William-An Sep 16, 2025
6387ede
integrate spinlock fastforwarding with accel-sim tracer
William-An Sep 16, 2025
c7e67f4
add custom rundir support for spinlock tool
William-An Sep 16, 2025
30e9c19
add spinlock detection script to the run_hw_trace.py
William-An Sep 16, 2025
7d30c2a
Automated Format
purdue-jenkins Sep 16, 2025
48a09f1
track kernel histogram for every launch in every context by kernel name
William-An Sep 22, 2025
b08340e
update tracer tool with per-kernel histogram
William-An Sep 22, 2025
2c68dee
Merge branch 'spinlock_fix' of github.com:purdue-aalp/accel-sim-frame…
William-An Sep 22, 2025
ea101fb
format to pass ci
William-An Sep 22, 2025
85d1b87
update test app
William-An Sep 22, 2025
08a6b5b
Merge branch 'dev' into spinlock_fix
William-An Sep 22, 2025
f2bcdfa
move test app to gpu-app-collection
William-An Sep 23, 2025
79f819f
update script for spinlock handling
William-An Sep 23, 2025
d21015b
update ci to include spinlock tracer run
William-An Sep 23, 2025
b577da3
add spinlock test app to accel-sim yaml
William-An Sep 23, 2025
186e1bf
fix a bug when detecting spinlock
William-An Sep 23, 2025
cad11ca
fix bug
William-An Sep 23, 2025
1f5a2ed
fix filename too long issue and clean intermediate files by default
William-An Sep 24, 2025
20e5f99
fix path issue
William-An Sep 24, 2025
fae2d48
fix histogram path for merged histo and add readme
William-An Sep 25, 2025
a10a587
address PR review and update top-level readme
William-An Sep 25, 2025
58d3a63
update CI for PR
William-An Oct 6, 2025
007d1a8
build spinlock
William-An Oct 6, 2025
5595b4a
clone recursively for building GPU ubench
William-An Oct 7, 2025
82a48a2
remove sim compare for spinlock since it takes too long to complete
William-An Oct 7, 2025
3257e47
move spinlock tracer test to weekly and fix a bug in it
William-An Oct 10, 2025
db3ed3f
Merge branch 'dev' into spinlock_fix
JRPan Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ jobs:
source ./env-setup/12.8_env_setup.sh
source ./gpu-app-collection/src/setup_environment
rm -rf ./hw_run/
./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7
./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7 --spinlock_handling none
- name: generate-rodinia_2.0-ft-hw_stats
run: |
source ./env-setup/12.8_env_setup.sh
Expand Down
20 changes: 19 additions & 1 deletion .github/workflows/weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
source ./env-setup/12.8_env_setup.sh
export PATH=/home/tgrogers-raid/a/common/python2:$PATH
rm -rf ./gpu-app-collection/
git clone [email protected]:accel-sim/gpu-app-collection.git
git clone --recursive [email protected]:accel-sim/gpu-app-collection.git
source ./gpu-app-collection/src/setup_environment
ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
make -j8 -C ./gpu-app-collection/src rodinia-3.1
Expand All @@ -53,6 +53,24 @@ jobs:
ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark -D 7
# ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
- name: generate-spinlock-traces-spinlock_handling
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-app-collection/src/setup_environment
rm -rf ./hw_run/
./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling fast_forward
mv ./hw_run ./hw_run_fast_forward
./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling none
mv ./hw_run ./hw_run_none
- name: test-new-traces-spinlock_handling
# Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr)
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-simulator/setup_environment.sh
./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward
./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward
# ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none
# ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none
SASS-Weekly:
needs: [Tracer-Weekly]
if: github.repository == 'accel-sim/accel-sim-framework'
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ gpu-simulator/gpgpu-sim
extern
gpu-simulator/accel_sim.pyi
compile_commands.json
.cache
.cache
.cursorrules
CLAUDE.md
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- [Accel-Sim Components](#accel-sim-components)
- [Accel-Sim Tracer](#accel-sim-tracer)
- [A simple example](#a-simple-example)
- [Spinlock handling](#spinlock-handling)
- [Pre-traced applications](#pre-traced-applications)
- [Accel-Sim SASS Frontend and Simulation Engine](#accel-sim-sass-frontend-and-simulation-engine)
- [Accel-Sim Correlator](#accel-sim-correlator)
Expand Down Expand Up @@ -113,6 +114,18 @@ That's it. The traces for the short-running rodinia tests will be generated in:

To extend the tracer, use other apps and understand what, exactly is going on, read [this](https://github.com/accel-sim/accel-sim-framework/blob/dev/util/tracer_nvbit/README.md).

#### Spinlock handling

If your application contains spinlock instructions, you can handle them with the tracer by using the following command:

```bash
./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D <gpu-device-num-to-run-on> --spinlock_handling fast_forward
```

This will fast forward the spinlock instructions and keep the spinlock instructions for the number of iterations specified in the `--spinlock_fast_forward_iterations` arg option.

The tool for spinlock detection is in `./util/tracer_nvbit/others/spinlock_tool/`.

#### Pre-traced applications
For convience, we have included a repository of pre-traced applications - to get all those traces, simply run:
```bash
Expand Down
8 changes: 8 additions & 0 deletions util/job_launching/apps/define-all-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ GPU_Atomic:
- args: 16
accel-sim-mem: 1G

Spinlock:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a dedicated "suite" instead of part of uBench? Then can we only run fast forward on this one?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I saw that on gpu-app-collection Spinlock is part of the uBench. Can you move this under ubench suite?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can, but the correlation of it will be terrible with just fast-forwarding when running with the ubench suite, given that this app is acquiring a highly contested lock.

I gave it a dedicated suite as all the atomic kernels also got a dedicated suite, even though they are under the ubench folder in the gpu-app-collection.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay valid point

exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- spinlock_simple:
- args:
accel-sim-mem: 1G

Atomic_Profile:
exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
Expand Down
7 changes: 2 additions & 5 deletions util/tracer_nvbit/.gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
nvbit_release/
silicon_checkpoint_tool/checkpoint/checkpoint.o
silicon_checkpoint_tool/checkpoint/checkpoint.so
tracer_tool/tracer_tool.o
tracer_tool/tracer_tool.so
tracer_tool/inject_funcs.o
*.o
*.so
tracer_tool/traces-processing/post-traces-processing
11 changes: 6 additions & 5 deletions util/tracer_nvbit/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@

all:
make -C tracer_tool
make -C tracer_tool/traces-processing
#make -C silicon_checkpoint_tool
$(MAKE) -C tracer_tool
$(MAKE) -C tracer_tool/traces-processing
$(MAKE) -C others/spinlock_tool
#$(MAKE) -C silicon_checkpoint_tool

clean:
make clean -C tracer_tool
make clean -C tracer_tool/traces-processing
$(MAKE) clean -C tracer_tool
$(MAKE) clean -C tracer_tool/traces-processing
79 changes: 79 additions & 0 deletions util/tracer_nvbit/others/spinlock_tool/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

NVCC=nvcc -ccbin=$(CXX) -D_FORCE_INLINES
PTXAS=ptxas

NVCC_VER_REQ=10.1
NVCC_VER=$(shell $(NVCC) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
NVCC_VER_CHECK=$(shell echo "${NVCC_VER} >= $(NVCC_VER_REQ)" | bc)

ifeq ($(NVCC_VER_CHECK),0)
$(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool! Instrumented applications can still use lower versions of nvcc.)
endif

PTXAS_VER_ADD_FLAG=12.3
PTXAS_VER=$(shell $(PTXAS) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
PTXAS_VER_CHECK=$(shell echo "${PTXAS_VER} >= $(PTXAS_VER_ADD_FLAG)" | bc)

ifeq ($(PTXAS_VER_CHECK), 0)
MAXRREGCOUNT_FLAG=-maxrregcount=24
else
MAXRREGCOUNT_FLAG=
endif

NVBIT_PATH=../../nvbit_release/core
INCLUDES=-I$(NVBIT_PATH)

LIBS=-L$(NVBIT_PATH) -lnvbit
NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /))

SOURCES=$(wildcard *.cu)

OBJECTS=$(SOURCES:.cu=.o)
ARCH?=all

mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path))))

NVBIT_TOOL=$(current_dir).so

all: $(NVBIT_TOOL)

$(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a
$(NVCC) -arch=$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@

%.o: %.cu
$(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=$(ARCH) -O3 -Xcompiler -fPIC $< -o $@

inject_funcs.o: inject_funcs.cu
$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@

clean:
rm -f *.so *.o
20 changes: 20 additions & 0 deletions util/tracer_nvbit/others/spinlock_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Spinlock tool

## Description

This tool is used to detect spinlocks in the kernel code.

## Usage

```bash
# Run program first time to get the instruction histogram of the program's kernels
SPINLOCK_PHASE=0 CUDA_INJECTION64_PATH=PATH/TO/spinlock_tool.so program

# Run program second time to get another instruction histogram of the program's kernels
# At the end of nvbit, this tool will generate a file with the name of spinlock_detection/spinlock_instructions.txt
# containing the instruction indices of the spinlock instructions in the program's kernels
SPINLOCK_PHASE=1 CUDA_INJECTION64_PATH=PATH/TO/spinlock_tool.so program

# To fast forward the spinlock instructions with accel-sim tracer, you can use the following command
ENABLE_SPINLOCK_FAST_FORWARD=1 CUDA_INJECTION64_PATH=PATH/TO/tracer_tool.so program
```
Loading