Skip to content

Commit d4b8e4c

Browse files
committed
[fs_connector][feat]: Add storage_offload CUDA kernel and setup config
Signed-off-by: Kfir Toledo <[email protected]>
1 parent f8bb304 commit d4b8e4c

File tree

3 files changed

+450
-0
lines changed

3 files changed

+450
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2025 The llm-d Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from setuptools import setup, find_packages
16+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
17+
18+
setup(
19+
name="storage_offload",
20+
packages=find_packages("src"),
21+
package_dir={"": "src"},
22+
ext_modules=[
23+
CUDAExtension(
24+
"storage_offload",
25+
sources=[
26+
"src/csrc/storage/storage_offload.cu",
27+
"src/csrc/storage/buffer.cpp",
28+
"src/csrc/storage/file_io.cpp",
29+
"src/csrc/storage/thread_pool.cpp",
30+
"src/csrc/storage/tensor_copy.cu",
31+
],
32+
libraries=['nvidia-ml', 'numa', 'cuda'],
33+
extra_compile_args={
34+
"cxx": ["-O3", "-std=c++17", "-fopenmp"],
35+
"nvcc": ["-O3", "-std=c++17", "-Xcompiler", "-std=c++17","-Xcompiler", "-fopenmp"]
36+
}
37+
),
38+
],
39+
cmdclass={"build_ext": BuildExtension},
40+
)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright 2025 The llm-d Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <cstdlib>
20+
#include <iostream>
21+
#include <string>
22+
#include <chrono>
23+
24+
// -------------------------------------
25+
// Debugging and timing macros
26+
// -------------------------------------
27+
28+
// Debug print - enabled when STORAGE_CONNECTOR_DEBUG is set and not "0"
29+
#define DEBUG_PRINT(msg) \
30+
do { \
31+
const char* env = std::getenv("STORAGE_CONNECTOR_DEBUG"); \
32+
if (env && std::string(env) != "0") std::cout << "[DEBUG] " << msg << std::endl; \
33+
} while (0)
34+
35+
// Timing macro - measures execution time when STORAGE_CONNECTOR_DEBUG is set and not "0"
36+
#define TIME_EXPR(label, expr, info_str) \
37+
([&]() { \
38+
const char* env = std::getenv("STORAGE_CONNECTOR_DEBUG"); \
39+
if (!(env && std::string(env) != "0")) { \
40+
return (expr); \
41+
} \
42+
auto __t0 = std::chrono::high_resolution_clock::now(); \
43+
auto __ret = (expr); \
44+
auto __t1 = std::chrono::high_resolution_clock::now(); \
45+
double __ms = std::chrono::duration<double, std::milli>(__t1 - __t0).count(); \
46+
std::cout << "[DEBUG][TIME] " << label << " took " << __ms << " ms | " << info_str << std::endl; \
47+
return __ret; \
48+
})()

0 commit comments

Comments
 (0)