accel-sim · William-An · Oct 10, 2025 · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -244,6 +244,9 @@ jobs:
   Tracer-Tool:
     needs: [check-format]
     runs-on: tgrogers-gpu01
+    strategy:
+      matrix:
+        spinlock_handling: ["none", "fast_forward"]
     defaults:
       run:
         shell: bash
@@ -291,7 +294,7 @@ jobs:
           source ./env-setup/12.8_env_setup.sh
           source ./gpu-app-collection/src/setup_environment
           rm -rf ./hw_run/
-          ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7
+          ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft -D 7 --spinlock_handling ${{ matrix.spinlock_handling }}
       - name: generate-rodinia_2.0-ft-hw_stats
         run: |
           source ./env-setup/12.8_env_setup.sh

diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml
@@ -126,6 +126,14 @@ GPU_Atomic:
             - args: 16
               accel-sim-mem: 1G
 
+Spinlock:
+    exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
+    data_dirs: "$GPUAPPS_ROOT/data_dirs/"
+    execs:
+        - spinlock_simple:
+            - args: 
+              accel-sim-mem: 1G
+
 Atomic_Profile:
     exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
     data_dirs: "$GPUAPPS_ROOT/data_dirs/"

diff --git a/util/tracer_nvbit/.gitignore b/util/tracer_nvbit/.gitignore
@@ -1,7 +1,4 @@
 nvbit_release/
-silicon_checkpoint_tool/checkpoint/checkpoint.o
-silicon_checkpoint_tool/checkpoint/checkpoint.so
-tracer_tool/tracer_tool.o
-tracer_tool/tracer_tool.so
-tracer_tool/inject_funcs.o
+*.o
+*.so
 tracer_tool/traces-processing/post-traces-processing
diff --git a/util/tracer_nvbit/Makefile b/util/tracer_nvbit/Makefile
@@ -1,9 +1,10 @@
 
 all:
-	make -C tracer_tool
-	make -C tracer_tool/traces-processing
-	#make -C silicon_checkpoint_tool
+	$(MAKE) -C tracer_tool
+	$(MAKE) -C tracer_tool/traces-processing
+	$(MAKE) -C others/spinlock_tool
+#$(MAKE) -C silicon_checkpoint_tool
 
 clean:
-	make clean -C tracer_tool
-	make clean -C tracer_tool/traces-processing
+	$(MAKE) clean -C tracer_tool
+	$(MAKE) clean -C tracer_tool/traces-processing
diff --git a/util/tracer_nvbit/others/spinlock_tool/Makefile b/util/tracer_nvbit/others/spinlock_tool/Makefile
@@ -0,0 +1,79 @@
+# SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+NVCC=nvcc -ccbin=$(CXX) -D_FORCE_INLINES
+PTXAS=ptxas
+
+NVCC_VER_REQ=10.1
+NVCC_VER=$(shell $(NVCC) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
+NVCC_VER_CHECK=$(shell echo "${NVCC_VER} >= $(NVCC_VER_REQ)" | bc)
+
+ifeq ($(NVCC_VER_CHECK),0)
+$(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool! Instrumented applications can still use lower versions of nvcc.)
+endif
+
+PTXAS_VER_ADD_FLAG=12.3
+PTXAS_VER=$(shell $(PTXAS) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
+PTXAS_VER_CHECK=$(shell echo "${PTXAS_VER} >= $(PTXAS_VER_ADD_FLAG)" | bc)
+
+ifeq ($(PTXAS_VER_CHECK), 0)
+MAXRREGCOUNT_FLAG=-maxrregcount=24
+else
+MAXRREGCOUNT_FLAG=
+endif
+
+NVBIT_PATH=../../nvbit_release/core
+INCLUDES=-I$(NVBIT_PATH)
+
+LIBS=-L$(NVBIT_PATH) -lnvbit
+NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /))
+
+SOURCES=$(wildcard *.cu)
+
+OBJECTS=$(SOURCES:.cu=.o)
+ARCH?=all
+
+mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
+current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path))))
+
+NVBIT_TOOL=$(current_dir).so
+
+all: $(NVBIT_TOOL)
+
+$(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a
+	$(NVCC) -arch=$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@
+
+%.o: %.cu
+	$(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=$(ARCH) -O3 -Xcompiler -fPIC $< -o $@
+
+inject_funcs.o: inject_funcs.cu
+	$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@
+
+clean:
+	rm -f *.so *.o
diff --git a/util/tracer_nvbit/others/spinlock_tool/README.md b/util/tracer_nvbit/others/spinlock_tool/README.md
@@ -0,0 +1,20 @@
+# Spinlock tool
+
+## Description
+
+This tool is used to detect spinlocks in the kernel code.
+
+## Usage
+
+```bash
+# Run program first time to get the instruction histogram of the program's kernels
+SPINLOCK_PHASE=0 CUDA_INJECTION64_PATH=PATH/TO/spinlock_tool.so program
+
+# Run program second time to get another instruction histogram of the program's kernels
+# At the end of nvbit, this tool will generate a file with the name of spinlock_detection/spinlock_instructions.txt
+# containing the instruction indices of the spinlock instructions in the program's kernels
+SPINLOCK_PHASE=1 CUDA_INJECTION64_PATH=PATH/TO/spinlock_tool.so program
+
+# To fast forward the spinlock instructions with accel-sim tracer, you can use the following command
+ENABLE_SPINLOCK_FAST_FORWARD=1 CUDA_INJECTION64_PATH=PATH/TO/tracer_tool.so program
+```
diff --git a/util/tracer_nvbit/others/spinlock_tool/common.h b/util/tracer_nvbit/others/spinlock_tool/common.h
@@ -0,0 +1,227 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <map>
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <regex>
+
+/* information collected in the instrumentation function and passed
+ * on the channel from the GPU to the CPU */
+typedef struct {
+    uint32_t instr_idx;
+    uint32_t count;
+} instr_count_t;
+
+
+/* Class to hold kernel instruction histogram */
+class KernelInstructionHistogram {
+public:
+    KernelInstructionHistogram() 
+        : id(0), name("dummy"), histogram(std::map<uint32_t, uint32_t>()) {
+    }
+
+    KernelInstructionHistogram(uint32_t id, std::string name) 
+        : id(id), name(name), histogram(std::map<uint32_t, uint32_t>()) {
+    }
+
+    void add(uint32_t instr_idx, uint64_t count) {
+        if (histogram.find(instr_idx) == histogram.end()) {
+            histogram[instr_idx] = count;
+        } else {
+            histogram[instr_idx] += count;
+        }
+    }
+
+    void merge(const KernelInstructionHistogram& other, bool use_hash = false) {
+        for (const auto& [instr_idx, count] : other.histogram) {
+            if (use_hash) {
+                // Simple modulo hash operation
+                add(instr_idx, count % hash_prime);
+                histogram[instr_idx] %= hash_prime;
+            } else {
+                add(instr_idx, count);
+            }
+        }
+    }
+
+    void reinit(uint32_t id, std::string name) {
+        this->id = id;
+        this->name = name;
+        histogram.clear();
+    }
+
+    std::map<uint32_t, std::pair<uint32_t, uint32_t>> findSpinlock(const KernelInstructionHistogram& other) {
+        // Find instructions that have different execution counts between two runs
+        // These are likely part of spinlock sections
+        std::map<uint32_t, std::pair<uint32_t, uint32_t>> spinlockInstructions;
+
+        // Check all instructions in this histogram
+        for (const auto& [instrIdx, count] : histogram) {
+            auto otherIt = other.histogram.find(instrIdx);
+            if (otherIt != other.histogram.end()) {
+                // Instruction exists in both histograms
+                if (count != otherIt->second) {
+                    // Different execution counts - likely spinlock
+                    spinlockInstructions[instrIdx] = {count, otherIt->second};
+                }
+            } else {
+                // Instruction only exists in this histogram
+                spinlockInstructions[instrIdx] = {count, 0};
+            }
+        }
+
+        // Check instructions that only exist in the other histogram
+        for (const auto& [instrIdx, count] : other.histogram) {
+            if (histogram.find(instrIdx) == histogram.end()) {
+                // Instruction only exists in other histogram
+                spinlockInstructions[instrIdx] = {0, count}; // Mark as 0 in this run
+            }
+        }
+
+        return spinlockInstructions;
+    }
+
+    // Save histogram to file
+    bool saveToFile(const std::string& filename) const {
+        std::ofstream file(filename);
+        if (!file.is_open()) {
+            return false;
+        }
+        file << serialize();
+        file.close();
+        return true;
+    }
+
+    // Load histogram from file
+    bool loadFromFile(const std::string& filename) {
+        std::ifstream file(filename);
+        if (!file.is_open()) {
+            return false;
+        }
+
+        std::stringstream buffer;
+        buffer << file.rdbuf();
+        file.close();
+
+        deserialize(buffer.str());
+        return true;
+    }
+
+    // Get total instruction count
+    uint64_t getTotalInstructionCount() const {
+        uint64_t total = 0;
+        for (const auto& [instrIdx, count] : histogram) {
+            total += count;
+        }
+        return total;
+    }
+
+    // Get number of unique instructions
+    size_t getUniqueInstructionCount() const {
+        return histogram.size();
+    }
+
+    // Check if histogram is empty
+    bool isEmpty() const {
+        return histogram.empty();
+    }
+
+    // Clear histogram
+    void clear() {
+        histogram.clear();
+    }
+
+    std::string serialize() const {
+        std::stringstream ss;
+        ss << "Kernel: " << name << " (ID: " << id << ")" << std::endl;
+        for (const auto &[instr_idx, count] : histogram) {
+            ss << instr_idx << ": " << count << std::endl;
+        }
+        return ss.str();
+    }
+
+    void deserialize(const std::string& data) {
+        // Deserialize the data following the serialize format
+        // Kernel: <name> (ID: <id>)
+        // <instr_idx>: <count>
+        // <instr_idx>: <count>
+        // ...
+        std::stringstream ss(data);
+        std::string line;
+
+        // Clear existing histogram
+        histogram.clear();
+
+        // Regex patterns for parsing
+        std::regex headerPattern(R"(Kernel:\s*(.+?)\s*\(ID:\s*(\d+)\))");
+        std::regex instructionPattern(R"(\s*(\d+)\s*:\s*(\d+)\s*)");
+
+        // Parse header line: "Kernel: <name> (ID: <id>)"
+        if (std::getline(ss, line)) {
+            std::smatch headerMatch;
+            if (std::regex_match(line, headerMatch, headerPattern)) {
+                if (headerMatch.size() >= 3) {
+                    name = headerMatch[1].str();
+                    id = std::stoul(headerMatch[2].str());
+                }
+            }
+        }
+
+        // Parse instruction count lines: "<instr_idx>: <count>"
+        while (std::getline(ss, line)) {
+            if (line.empty()) continue;
+
+            std::smatch instructionMatch;
+            if (std::regex_match(line, instructionMatch, instructionPattern)) {
+                if (instructionMatch.size() >= 3) {
+                    try {
+                        uint32_t instrIdx = std::stoul(instructionMatch[1].str());
+                        uint32_t count = std::stoul(instructionMatch[2].str());
+                        histogram[instrIdx] = count;
+                    } catch (const std::exception& e) {
+                        // Skip malformed lines
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+
+    uint32_t id;
+    std::string name;
+    std::map<uint32_t, uint32_t> histogram;
+    // A large 30-bit prime number for hashing to avoid overflow
+    uint32_t hash_prime = 1073741789;
+};