From 45211214499a2e621c0f4e8da87cf6a502ad1789 Mon Sep 17 00:00:00 2001
From: haitwang-cloud <haitao_wht@outlook.com>
Date: Wed, 19 Feb 2025 16:32:52 +0800
Subject: [PATCH 1/5] feat: add initial implementation of nvidia plugin
 components and interfaces

Signed-off-by: haitwang-cloud <haitao_wht@outlook.com>
---
 Makefile                                      |   2 +-
 cmd/device-plugin/nvidia/main.go              | 320 ++++----
 cmd/device-plugin/nvidia/plugin-manager.go    |  91 ++-
 cmd/device-plugin/nvidia/root.go              |  85 ++
 cmd/device-plugin/nvidia/vgpucfg.go           |  14 +-
 docker/Dockerfile                             |   9 +-
 docker/Dockerfile.new                         |  79 ++
 go.mod                                        |  84 +-
 go.sum                                        | 175 ++---
 .../nvidiadevice/nvinternal/cdi/api.go        |  41 -
 .../nvidiadevice/nvinternal/cdi/cdi.go        | 193 -----
 .../nvidiadevice/nvinternal/cdi/factory.go    |  52 --
 .../nvidiadevice/nvinternal/cdi/null.go       |  59 --
 .../nvidiadevice/nvinternal/cdi/options.go    | 103 ---
 .../nvidiadevice/nvinternal/info/version.go   |  64 --
 .../nvidiadevice/nvinternal/plugin/api.go     |  42 -
 .../nvinternal/plugin/manager/api.go          |  41 -
 .../nvinternal/plugin/manager/factory.go      | 152 ----
 .../nvinternal/plugin/manager/null.go         |  49 --
 .../nvinternal/plugin/manager/nvml.go         |  61 --
 .../nvinternal/plugin/manager/options.go      |  84 --
 .../nvinternal/plugin/manager/tegra.go        |  61 --
 .../nvidiadevice/nvinternal/plugin/server.go  | 671 ----------------
 .../nvinternal/plugin/util_test.go            | 156 ----
 .../nvidiadevice/nvinternal/rm/allocate.go    | 137 ----
 .../nvinternal/rm/device_map_test.go          | 583 --------------
 .../nvidiadevice/nvinternal/rm/health_test.go | 100 ---
 .../nvidiadevice/nvinternal/rm/helper.go      |  54 --
 .../nvinternal/rm/nvml_devices_test.go        | 179 -----
 .../nvinternal/rm/nvml_manager.go             | 120 ---
 .../nvidiadevice/nvinternal/rm/rm.go          | 176 -----
 .../nvidiadevice/nvinternal/rm/wsl_devices.go |  52 --
 pkg/device/nvidia/device.go                   |   8 +-
 pkg/nvidia-plugin/api/config/v1/config.go     | 160 ++++
 pkg/nvidia-plugin/api/config/v1/consts.go     |  72 ++
 pkg/nvidia-plugin/api/config/v1/duration.go   |  69 ++
 pkg/nvidia-plugin/api/config/v1/flags.go      | 190 +++++
 pkg/nvidia-plugin/api/config/v1/flags_test.go | 246 ++++++
 pkg/nvidia-plugin/api/config/v1/imex.go       |  53 ++
 pkg/nvidia-plugin/api/config/v1/imex_test.go  |  83 ++
 pkg/nvidia-plugin/api/config/v1/replicas.go   | 355 +++++++++
 .../api/config/v1/replicas_test.go            | 482 ++++++++++++
 pkg/nvidia-plugin/api/config/v1/resources.go  | 196 +++++
 pkg/nvidia-plugin/api/config/v1/sharing.go    |  69 ++
 pkg/nvidia-plugin/api/config/v1/strategy.go   |  69 ++
 pkg/nvidia-plugin/mps-control-daemon/main.go  | 255 ++++++
 .../mps-control-daemon/mount/mount-shm.go     | 108 +++
 .../mps-control-daemon/mps/daemon.go          | 280 +++++++
 .../mps-control-daemon/mps/device.go          |  55 ++
 .../mps-control-daemon/mps/device_test.go     | 112 +++
 .../mps-control-daemon/mps/log-tailer.go      |  69 ++
 .../mps-control-daemon/mps/manager.go         | 112 +++
 .../mps-control-daemon/mps/options.go         |  29 +
 .../mps-control-daemon/mps/root.go            |  59 ++
 pkg/nvidia-plugin/pkg/cdi/api.go              |  31 +
 .../pkg}/cdi/api_mock.go                      |   0
 pkg/nvidia-plugin/pkg/cdi/cdi.go              | 231 ++++++
 pkg/nvidia-plugin/pkg/cdi/imex.go             |  63 ++
 pkg/nvidia-plugin/pkg/cdi/null.go             |  43 +
 pkg/nvidia-plugin/pkg/cdi/options.go          | 102 +++
 pkg/nvidia-plugin/pkg/cuda/api.go             | 119 +++
 pkg/nvidia-plugin/pkg/cuda/cgo_helpers.go     |  27 +
 pkg/nvidia-plugin/pkg/cuda/consts.go          |  95 +++
 pkg/nvidia-plugin/pkg/cuda/cuda.go            | 176 +++++
 pkg/nvidia-plugin/pkg/cuda/device.go          |  17 +
 pkg/nvidia-plugin/pkg/cuda/result.go          | 178 +++++
 .../pkg/dependencies/dependencies.go          |   7 +
 pkg/nvidia-plugin/pkg/flags/kubeclient.go     | 114 +++
 pkg/nvidia-plugin/pkg/flags/node.go           |  46 ++
 pkg/nvidia-plugin/pkg/imex/imex.go            |  98 +++
 pkg/nvidia-plugin/pkg/info/version.go         |  48 ++
 pkg/nvidia-plugin/pkg/lm/empty.go             |  24 +
 pkg/nvidia-plugin/pkg/lm/imex.go              | 182 +++++
 pkg/nvidia-plugin/pkg/lm/imex_test.go         |  57 ++
 pkg/nvidia-plugin/pkg/lm/labeler.go           |  45 ++
 pkg/nvidia-plugin/pkg/lm/labels.go            |  25 +
 pkg/nvidia-plugin/pkg/lm/list.go              |  46 ++
 pkg/nvidia-plugin/pkg/lm/machine-type.go      |  53 ++
 pkg/nvidia-plugin/pkg/lm/mig-strategy.go      | 311 ++++++++
 pkg/nvidia-plugin/pkg/lm/mig-strategy_test.go | 422 ++++++++++
 pkg/nvidia-plugin/pkg/lm/nvml.go              | 262 +++++++
 pkg/nvidia-plugin/pkg/lm/nvml_test.go         | 292 +++++++
 pkg/nvidia-plugin/pkg/lm/output.go            | 155 ++++
 pkg/nvidia-plugin/pkg/lm/resource.go          | 319 ++++++++
 pkg/nvidia-plugin/pkg/lm/resource_test.go     | 437 +++++++++++
 pkg/nvidia-plugin/pkg/lm/strategy.go          |  28 +
 pkg/nvidia-plugin/pkg/lm/timestamp.go         |  37 +
 pkg/nvidia-plugin/pkg/lm/vgpu.go              |  58 ++
 pkg/nvidia-plugin/pkg/logger/klog.go          |  34 +
 .../pkg/mig/mig-dp.go}                        |  34 +-
 pkg/nvidia-plugin/pkg/mig/mig.go              | 124 +++
 pkg/nvidia-plugin/pkg/plugin/api.go           |  26 +
 pkg/nvidia-plugin/pkg/plugin/factory.go       | 138 ++++
 pkg/nvidia-plugin/pkg/plugin/mps.go           |  91 +++
 pkg/nvidia-plugin/pkg/plugin/options.go       |  79 ++
 .../pkg}/plugin/register.go                   |   2 +-
 .../pkg}/plugin/register_test.go              |  14 +-
 pkg/nvidia-plugin/pkg/plugin/server.go        | 742 ++++++++++++++++++
 .../pkg}/plugin/server_test.go                | 121 ++-
 .../pkg}/plugin/util.go                       |   2 +-
 pkg/nvidia-plugin/pkg/resource/cuda-device.go | 110 +++
 pkg/nvidia-plugin/pkg/resource/cuda-lib.go    |  88 +++
 pkg/nvidia-plugin/pkg/resource/device_mock.go | 437 +++++++++++
 pkg/nvidia-plugin/pkg/resource/factory.go     |  84 ++
 pkg/nvidia-plugin/pkg/resource/fallback.go    |  64 ++
 .../pkg/resource/fallback_test.go             |  62 ++
 .../pkg/resource/manager_mock.go              | 215 +++++
 pkg/nvidia-plugin/pkg/resource/null.go        |  57 ++
 pkg/nvidia-plugin/pkg/resource/nvml-device.go | 119 +++
 pkg/nvidia-plugin/pkg/resource/nvml-lib.go    |  94 +++
 .../pkg/resource/nvml-mig-device.go           | 152 ++++
 .../pkg/resource/sysfs-device.go              |  77 ++
 pkg/nvidia-plugin/pkg/resource/sysfs-lib.go   |  74 ++
 .../pkg/resource/testing/resource-testing.go  | 141 ++++
 pkg/nvidia-plugin/pkg/resource/types.go       |  45 ++
 pkg/nvidia-plugin/pkg/rm/allocate.go          |  80 ++
 .../pkg}/rm/device_map.go                     | 125 +--
 pkg/nvidia-plugin/pkg/rm/device_map_test.go   | 109 +++
 .../pkg}/rm/devices.go                        | 117 +--
 .../pkg}/rm/health.go                         |  73 +-
 pkg/nvidia-plugin/pkg/rm/health_test.go       |  74 ++
 pkg/nvidia-plugin/pkg/rm/helper.go            |  32 +
 .../pkg}/rm/nvml_devices.go                   |  89 ++-
 pkg/nvidia-plugin/pkg/rm/nvml_manager.go      | 138 ++++
 pkg/nvidia-plugin/pkg/rm/rm.go                | 138 ++++
 pkg/nvidia-plugin/pkg/rm/rm_test.go           | 195 +++++
 .../pkg}/rm/tegra_devices.go                  |  56 +-
 .../pkg}/rm/tegra_manager.go                  |  50 +-
 pkg/nvidia-plugin/pkg/rm/wsl_devices.go       |  46 ++
 pkg/nvidia-plugin/pkg/vgpu/pciutil.go         | 204 +++++
 pkg/nvidia-plugin/pkg/vgpu/pciutil_test.go    |  42 +
 pkg/nvidia-plugin/pkg/vgpu/vgpu.go            | 153 ++++
 pkg/nvidia-plugin/pkg/vgpu/vgpu_test.go       |  74 ++
 pkg/nvidia-plugin/pkg/watch/watchers.go       |  49 ++
 version.mk                                    |  32 +-
 135 files changed, 12510 insertions(+), 3960 deletions(-)
 create mode 100644 cmd/device-plugin/nvidia/root.go
 create mode 100644 docker/Dockerfile.new
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/cdi/api.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/cdi/cdi.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/cdi/factory.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/cdi/null.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/cdi/options.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/info/version.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/api.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/api.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/factory.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/null.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/nvml.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/options.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/tegra.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/allocate.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map_test.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/health_test.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/helper.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices_test.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/rm.go
 delete mode 100644 pkg/device-plugin/nvidiadevice/nvinternal/rm/wsl_devices.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/config.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/consts.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/duration.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/flags.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/flags_test.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/imex.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/imex_test.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/replicas.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/replicas_test.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/resources.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/sharing.go
 create mode 100644 pkg/nvidia-plugin/api/config/v1/strategy.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/main.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/device.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/options.go
 create mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/root.go
 create mode 100644 pkg/nvidia-plugin/pkg/cdi/api.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/cdi/api_mock.go (100%)
 create mode 100644 pkg/nvidia-plugin/pkg/cdi/cdi.go
 create mode 100644 pkg/nvidia-plugin/pkg/cdi/imex.go
 create mode 100644 pkg/nvidia-plugin/pkg/cdi/null.go
 create mode 100644 pkg/nvidia-plugin/pkg/cdi/options.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/api.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/cgo_helpers.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/consts.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/cuda.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/device.go
 create mode 100644 pkg/nvidia-plugin/pkg/cuda/result.go
 create mode 100644 pkg/nvidia-plugin/pkg/dependencies/dependencies.go
 create mode 100644 pkg/nvidia-plugin/pkg/flags/kubeclient.go
 create mode 100644 pkg/nvidia-plugin/pkg/flags/node.go
 create mode 100644 pkg/nvidia-plugin/pkg/imex/imex.go
 create mode 100644 pkg/nvidia-plugin/pkg/info/version.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/empty.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/imex.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/imex_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/labeler.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/labels.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/list.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/machine-type.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/mig-strategy.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/mig-strategy_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/nvml.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/nvml_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/output.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/resource.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/resource_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/strategy.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/timestamp.go
 create mode 100644 pkg/nvidia-plugin/pkg/lm/vgpu.go
 create mode 100644 pkg/nvidia-plugin/pkg/logger/klog.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal/mig/mig.go => nvidia-plugin/pkg/mig/mig-dp.go} (70%)
 create mode 100644 pkg/nvidia-plugin/pkg/mig/mig.go
 create mode 100644 pkg/nvidia-plugin/pkg/plugin/api.go
 create mode 100644 pkg/nvidia-plugin/pkg/plugin/factory.go
 create mode 100644 pkg/nvidia-plugin/pkg/plugin/mps.go
 create mode 100644 pkg/nvidia-plugin/pkg/plugin/options.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/plugin/register.go (98%)
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/plugin/register_test.go (91%)
 create mode 100644 pkg/nvidia-plugin/pkg/plugin/server.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/plugin/server_test.go (58%)
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/plugin/util.go (99%)
 create mode 100644 pkg/nvidia-plugin/pkg/resource/cuda-device.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/cuda-lib.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/device_mock.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/factory.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/fallback.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/fallback_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/manager_mock.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/null.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/nvml-device.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/nvml-lib.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/nvml-mig-device.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/sysfs-device.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/sysfs-lib.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/testing/resource-testing.go
 create mode 100644 pkg/nvidia-plugin/pkg/resource/types.go
 create mode 100644 pkg/nvidia-plugin/pkg/rm/allocate.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/device_map.go (73%)
 create mode 100644 pkg/nvidia-plugin/pkg/rm/device_map_test.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/devices.go (71%)
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/health.go (81%)
 create mode 100644 pkg/nvidia-plugin/pkg/rm/health_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/rm/helper.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/nvml_devices.go (65%)
 create mode 100644 pkg/nvidia-plugin/pkg/rm/nvml_manager.go
 create mode 100644 pkg/nvidia-plugin/pkg/rm/rm.go
 create mode 100644 pkg/nvidia-plugin/pkg/rm/rm_test.go
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/tegra_devices.go (53%)
 rename pkg/{device-plugin/nvidiadevice/nvinternal => nvidia-plugin/pkg}/rm/tegra_manager.go (55%)
 create mode 100644 pkg/nvidia-plugin/pkg/rm/wsl_devices.go
 create mode 100644 pkg/nvidia-plugin/pkg/vgpu/pciutil.go
 create mode 100644 pkg/nvidia-plugin/pkg/vgpu/pciutil_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/vgpu/vgpu.go
 create mode 100644 pkg/nvidia-plugin/pkg/vgpu/vgpu_test.go
 create mode 100644 pkg/nvidia-plugin/pkg/watch/watchers.go

diff --git a/Makefile b/Makefile
index 3dd3709b2..cf697b9c7 100644
--- a/Makefile
+++ b/Makefile
@@ -7,10 +7,10 @@ docker:
 	docker build \
 	--build-arg GOLANG_IMAGE=${GOLANG_IMAGE} \
 	--build-arg TARGET_ARCH=${TARGET_ARCH} \
+	--build-arg NVIDIA_DEVEL_IMAGE=${NVIDIA_DEVEL_IMAGE} \
 	--build-arg NVIDIA_IMAGE=${NVIDIA_IMAGE} \
 	--build-arg DEST_DIR=${DEST_DIR} \
 	--build-arg VERSION=${VERSION} \
-	--build-arg GOPROXY=https://goproxy.cn,direct \
 	. -f=docker/Dockerfile -t ${IMG_TAG}
 
 dockerwithlib:
diff --git a/cmd/device-plugin/nvidia/main.go b/cmd/device-plugin/nvidia/main.go
index c9d823b4c..cec7923ca 100644
--- a/cmd/device-plugin/nvidia/main.go
+++ b/cmd/device-plugin/nvidia/main.go
@@ -1,73 +1,61 @@
 /*
-Copyright 2024 The HAMi Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package main
 
 import (
 	"encoding/json"
-	"flag"
+	"errors"
 	"fmt"
 	"os"
+	"path/filepath"
 	"syscall"
 	"time"
 
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/info"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/rm"
-	"github.com/Project-HAMi/HAMi/pkg/util"
-	flagutil "github.com/Project-HAMi/HAMi/pkg/util/flag"
-
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	nvinfo "github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
 	"github.com/fsnotify/fsnotify"
-	cli "github.com/urfave/cli/v2"
-	errorsutil "k8s.io/apimachinery/pkg/util/errors"
+	"github.com/urfave/cli/v2"
 	"k8s.io/klog/v2"
-	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/info"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/logger"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/plugin"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/watch"
+	"github.com/Project-HAMi/HAMi/pkg/util"
 )
 
-func main() {
-	var configFile string
+type options struct {
+	flags         []cli.Flag
+	configFile    string
+	kubeletSocket string
+}
 
+func main() {
 	c := cli.NewApp()
+	o := &options{}
 	c.Name = "NVIDIA Device Plugin"
 	c.Usage = "NVIDIA device plugin for Kubernetes"
+	c.Version = info.GetVersionString()
 	c.Action = func(ctx *cli.Context) error {
-		flagutil.PrintCliFlags(ctx)
-		return start(ctx, c.Flags)
-	}
-	c.Commands = []*cli.Command{
-		{
-			Name:  "version",
-			Usage: "Show the version of NVIDIA Device Plugin",
-			Action: func(c *cli.Context) error {
-				fmt.Printf("%s version: %s\n", c.App.Name, info.GetVersionString())
-				return nil
-			},
-		},
-	}
-
-	flagset := flag.NewFlagSet("klog", flag.ExitOnError)
-	klog.InitFlags(flagset)
-
-	c.Before = func(ctx *cli.Context) error {
-		logLevel := ctx.Int("v")
-		if err := flagset.Set("v", fmt.Sprintf("%d", logLevel)); err != nil {
-			return err
-		}
-		return nil
+		return start(ctx, o)
 	}
 
 	c.Flags = []cli.Flag{
@@ -84,11 +72,18 @@ func main() {
 			EnvVars: []string{"FAIL_ON_INIT_ERROR"},
 		},
 		&cli.StringFlag{
-			Name:    "nvidia-driver-root",
+			Name:    "driver-root",
+			Aliases: []string{"nvidia-driver-root"},
 			Value:   "/",
-			Usage:   "the root path for the NVIDIA driver installation (typical values are '/' or '/run/nvidia/driver')",
+			Usage:   "the root path for the NVIDIA driver installation on the host (typical values are '/' or '/run/nvidia/driver')",
 			EnvVars: []string{"NVIDIA_DRIVER_ROOT"},
 		},
+		&cli.StringFlag{
+			Name:    "dev-root",
+			Aliases: []string{"nvidia-dev-root"},
+			Usage:   "the root path for the NVIDIA device nodes on the host (typical values are '/' or '/run/nvidia/driver')",
+			EnvVars: []string{"NVIDIA_DEV_ROOT"},
+		},
 		&cli.BoolFlag{
 			Name:    "pass-device-specs",
 			Value:   false,
@@ -97,7 +92,7 @@ func main() {
 		},
 		&cli.StringSliceFlag{
 			Name:    "device-list-strategy",
-			Value:   cli.NewStringSlice(string(spec.DeviceListStrategyEnvvar)),
+			Value:   cli.NewStringSlice(string(spec.DeviceListStrategyEnvVar)),
 			Usage:   "the desired strategy for passing the device list to the underlying runtime:\n\t\t[envvar | volume-mounts | cdi-annotations]",
 			EnvVars: []string{"DEVICE_LIST_STRATEGY"},
 		},
@@ -117,10 +112,17 @@ func main() {
 			Usage:   "ensure that containers are started with NVIDIA_MOFED=enabled",
 			EnvVars: []string{"MOFED_ENABLED"},
 		},
+		&cli.StringFlag{
+			Name:        "kubelet-socket",
+			Value:       pluginapi.KubeletSocket,
+			Usage:       "specify the socket for communicating with the kubelet; if this is empty, no connection with the kubelet is attempted",
+			Destination: &o.kubeletSocket,
+			EnvVars:     []string{"KUBELET_SOCKET"},
+		},
 		&cli.StringFlag{
 			Name:        "config-file",
 			Usage:       "the path to a config file as an alternative to command line options or environment variables",
-			Destination: &configFile,
+			Destination: &o.configFile,
 			EnvVars:     []string{"CONFIG_FILE"},
 		},
 		&cli.StringFlag{
@@ -130,24 +132,45 @@ func main() {
 			EnvVars: []string{"CDI_ANNOTATION_PREFIX"},
 		},
 		&cli.StringFlag{
-			Name:    "nvidia-ctk-path",
+			Name:    "nvidia-cdi-hook-path",
+			Aliases: []string{"nvidia-ctk-path"},
 			Value:   spec.DefaultNvidiaCTKPath,
-			Usage:   "the path to use for the nvidia-ctk in the generated CDI specification",
-			EnvVars: []string{"NVIDIA_CTK_PATH"},
+			Usage:   "the path to use for NVIDIA CDI hooks in the generated CDI specification",
+			EnvVars: []string{"NVIDIA_CDI_HOOK_PATH", "NVIDIA_CTK_PATH"},
 		},
 		&cli.StringFlag{
-			Name:    "container-driver-root",
+			Name:    "driver-root-ctr-path",
+			Aliases: []string{"container-driver-root"},
 			Value:   spec.DefaultContainerDriverRoot,
 			Usage:   "the path where the NVIDIA driver root is mounted in the container; used for generating CDI specifications",
-			EnvVars: []string{"CONTAINER_DRIVER_ROOT"},
+			EnvVars: []string{"DRIVER_ROOT_CTR_PATH", "CONTAINER_DRIVER_ROOT"},
+		},
+		&cli.StringFlag{
+			Name:    "mps-root",
+			Usage:   "the path on the host where MPS-specific mounts and files are created by the MPS control daemon manager",
+			EnvVars: []string{"MPS_ROOT"},
+		},
+		&cli.StringFlag{
+			Name:    "device-discovery-strategy",
+			Value:   "auto",
+			Usage:   "the strategy to use to discover devices: 'auto', 'nvml', or 'tegra'",
+			EnvVars: []string{"DEVICE_DISCOVERY_STRATEGY"},
+		},
+		&cli.IntSliceFlag{
+			Name:    "imex-channel-ids",
+			Usage:   "A list of IMEX channels to inject.",
+			EnvVars: []string{"IMEX_CHANNEL_IDS"},
 		},
-		&cli.IntFlag{
-			Name:  "v",
-			Usage: "number for the log level verbosity",
-			Value: 0,
+		&cli.BoolFlag{
+			Name:    "imex-required",
+			Usage:   "The specified IMEX channels are required",
+			EnvVars: []string{"IMEX_REQUIRED"},
 		},
 	}
+	// add extra flags for HAMi
 	c.Flags = append(c.Flags, addFlags()...)
+	o.flags = c.Flags
+
 	err := c.Run(os.Args)
 	if err != nil {
 		klog.Error(err)
@@ -155,15 +178,50 @@ func main() {
 	}
 }
 
-func validateFlags(config *spec.Config) error {
-	_, err := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
+func validateFlags(infolib nvinfo.Interface, config *spec.Config) error {
+	deviceListStrategies, err := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
 	if err != nil {
 		return fmt.Errorf("invalid --device-list-strategy option: %v", err)
 	}
 
+	hasNvml, _ := infolib.HasNvml()
+	if deviceListStrategies.AnyCDIEnabled() && !hasNvml {
+		return fmt.Errorf("CDI --device-list-strategy options are only supported on NVML-based systems")
+	}
+
 	if *config.Flags.Plugin.DeviceIDStrategy != spec.DeviceIDStrategyUUID && *config.Flags.Plugin.DeviceIDStrategy != spec.DeviceIDStrategyIndex {
 		return fmt.Errorf("invalid --device-id-strategy option: %v", *config.Flags.Plugin.DeviceIDStrategy)
 	}
+
+	if config.Sharing.SharingStrategy() == spec.SharingStrategyMPS {
+		if *config.Flags.MigStrategy == spec.MigStrategyMixed {
+			return fmt.Errorf("using --mig-strategy=mixed is not supported with MPS")
+		}
+		if config.Flags.MpsRoot == nil || *config.Flags.MpsRoot == "" {
+			return fmt.Errorf("using MPS requires --mps-root to be specified")
+		}
+	}
+
+	switch *config.Flags.DeviceDiscoveryStrategy {
+	case "auto":
+	case "nvml":
+	case "tegra":
+	default:
+		return fmt.Errorf("invalid --device-discovery-strategy option %v", *config.Flags.DeviceDiscoveryStrategy)
+	}
+
+	switch *config.Flags.MigStrategy {
+	case spec.MigStrategyNone:
+	case spec.MigStrategySingle:
+	case spec.MigStrategyMixed:
+	default:
+		return fmt.Errorf("unknown MIG strategy: %v", *config.Flags.MigStrategy)
+	}
+
+	if err := spec.AssertChannelIDsValid(config.Imex.ChannelIDs); err != nil {
+		return fmt.Errorf("invalid IMEX channel IDs: %w", err)
+	}
+
 	return nil
 }
 
@@ -172,35 +230,38 @@ func loadConfig(c *cli.Context, flags []cli.Flag) (*spec.Config, error) {
 	if err != nil {
 		return nil, fmt.Errorf("unable to finalize config: %v", err)
 	}
-	err = validateFlags(config)
-	if err != nil {
-		return nil, fmt.Errorf("unable to validate flags: %v", err)
-	}
 	config.Flags.GFD = nil
 	return config, nil
 }
 
-func start(c *cli.Context, flags []cli.Flag) error {
-	klog.Info("Starting FS watcher.")
+func start(c *cli.Context, o *options) error {
+	klog.InfoS(fmt.Sprintf("Starting %s", c.App.Name), "version", c.App.Version)
+
 	util.NodeName = os.Getenv(util.NodeNameEnvName)
-	watcher, err := newFSWatcher(kubeletdevicepluginv1beta1.DevicePluginPath)
+	// watcher, err := newFSWatcher(kubeletdevicepluginv1beta1.DevicePluginPath)
+	// if err != nil {
+	// 	return fmt.Errorf("failed to create FS watcher: %v", err)
+	// }
+	// defer watcher.Close()
+
+	kubeletSocketDir := filepath.Dir(o.kubeletSocket)
+	klog.Infof("Starting FS watcher for %v", kubeletSocketDir)
+	watcher, err := watch.Files(kubeletSocketDir)
 	if err != nil {
-		return fmt.Errorf("failed to create FS watcher: %v", err)
+		return fmt.Errorf("failed to create FS watcher for %s: %v", pluginapi.DevicePluginPath, err)
 	}
 	defer watcher.Close()
-	//device.InitDevices()
 
-	/*Loading config files*/
 	klog.Infof("Start working on node %s", util.NodeName)
 	klog.Info("Starting OS watcher.")
-	sigs := newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
+	sigs := watch.Signals(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
 
-	var restarting bool
+	var started bool
 	var restartTimeout <-chan time.Time
 	var plugins []plugin.Interface
 restart:
 	// If we are restarting, stop plugins from previous run.
-	if restarting {
+	if started {
 		err := stopPlugins(plugins)
 		if err != nil {
 			return fmt.Errorf("error stopping plugins from previous run: %v", err)
@@ -208,18 +269,17 @@ restart:
 	}
 
 	klog.Info("Starting Plugins.")
-	plugins, restartPlugins, err := startPlugins(c, flags, restarting)
+	plugins, restartPlugins, err := startPlugins(c, o)
 	if err != nil {
 		return fmt.Errorf("error starting plugins: %v", err)
 	}
+	started = true
 
 	if restartPlugins {
-		klog.Info("Failed to start one or more plugins. Retrying in 30s...")
+		klog.Infof("Failed to start one or more plugins. Retrying in 30s...")
 		restartTimeout = time.After(30 * time.Second)
 	}
 
-	restarting = true
-
 	// Start an infinite loop, waiting for several indicators to either log
 	// some messages, trigger a restart of the plugins, or exit the program.
 	for {
@@ -229,17 +289,17 @@ restart:
 			goto restart
 
 		// Detect a kubelet restart by watching for a newly created
-		// 'kubeletdevicepluginv1beta1.KubeletSocket' file. When this occurs, restart this loop,
+		// 'pluginapi.KubeletSocket' file. When this occurs, restart this loop,
 		// restarting all of the plugins in the process.
 		case event := <-watcher.Events:
-			if event.Name == kubeletdevicepluginv1beta1.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
-				klog.Infof("inotify: %s created, restarting.", kubeletdevicepluginv1beta1.KubeletSocket)
+			if o.kubeletSocket != "" && event.Name == o.kubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
+				klog.Infof("inotify: %s created, restarting.", o.kubeletSocket)
 				goto restart
 			}
 
 		// Watch for any other fs errors and log them.
 		case err := <-watcher.Errors:
-			klog.Errorf("inotify: %s", err)
+			klog.Infof("inotify: %s", err)
 
 		// Watch for any signals from the OS. On SIGHUP, restart this loop,
 		// restarting all of the plugins in the process. On all other
@@ -263,32 +323,47 @@ exit:
 	return nil
 }
 
-func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.Interface, bool, error) {
+func startPlugins(c *cli.Context, o *options) ([]plugin.Interface, bool, error) {
 	// Load the configuration file
 	klog.Info("Loading configuration.")
-	config, err := loadConfig(c, flags)
+	config, err := loadConfig(c, o.flags)
 	if err != nil {
 		return nil, false, fmt.Errorf("unable to load config: %v", err)
 	}
-	disableResourceRenamingInConfig(config)
+	spec.DisableResourceNamingInConfig(logger.ToKlog, config)
 
-	/*Loading config files*/
-	//fmt.Println("NodeName=", config.NodeName)
-	devConfig, err := generateDeviceConfigFromNvidia(config, c, flags)
+	devConfig, err := generateDeviceConfigFromNvidia(config, c, o.flags)
 	if err != nil {
 		klog.Errorf("failed to load config file %s", err.Error())
 		return nil, false, err
 	}
 
+	driverRoot := root(*devConfig.Config.Flags.Plugin.ContainerDriverRoot)
+	// We construct an NVML library specifying the path to libnvidia-ml.so.1
+	// explicitly so that we don't have to rely on the library path.
+	nvmllib := nvml.New(
+		nvml.WithLibraryPath(driverRoot.tryResolveLibrary("libnvidia-ml.so.1")),
+	)
+	devicelib := device.New(nvmllib)
+	infolib := nvinfo.New(
+		nvinfo.WithNvmlLib(nvmllib),
+		nvinfo.WithDeviceLib(devicelib),
+	)
+
+	err = validateFlags(infolib, devConfig.Config)
+	if err != nil {
+		return nil, false, fmt.Errorf("unable to validate flags: %v", err)
+	}
+
 	// Update the configuration file with default resources.
 	klog.Info("Updating config with default resource matching patterns.")
-	err = rm.AddDefaultResourcesToConfig(&devConfig)
+	err = rm.AddDefaultResourcesToConfig(infolib, nvmllib, devicelib, devConfig.Config)
 	if err != nil {
 		return nil, false, fmt.Errorf("unable to add default resources to config: %v", err)
 	}
 
 	// Print the config to the output.
-	configJSON, err := json.MarshalIndent(devConfig, "", "  ")
+	configJSON, err := json.MarshalIndent(devConfig.Config, "", "  ")
 	if err != nil {
 		return nil, false, fmt.Errorf("failed to marshal config to JSON: %v", err)
 	}
@@ -296,11 +371,7 @@ func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.I
 
 	// Get the set of plugins.
 	klog.Info("Retrieving plugins.")
-	pluginManager, err := NewPluginManager(&devConfig)
-	if err != nil {
-		return nil, false, fmt.Errorf("error creating plugin manager: %v", err)
-	}
-	plugins, err := pluginManager.GetPlugins()
+	plugins, err := GetPlugins(infolib, nvmllib, devicelib, devConfig)
 	if err != nil {
 		return nil, false, fmt.Errorf("error getting plugins: %v", err)
 	}
@@ -316,10 +387,8 @@ func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.I
 		}
 
 		// Start the gRPC server for plugin p and connect it with the kubelet.
-		if err := p.Start(); err != nil {
-			klog.Error("Could not contact Kubelet. Did you enable the device plugin feature gate?")
-			klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
-			klog.Error("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
+		if err := p.Start(o.kubeletSocket); err != nil {
+			klog.Errorf("Failed to start plugin: %v", err)
 			return plugins, true, nil
 		}
 		started++
@@ -334,48 +403,9 @@ func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.I
 
 func stopPlugins(plugins []plugin.Interface) error {
 	klog.Info("Stopping plugins.")
-	errs := []error{}
+	var errs error
 	for _, p := range plugins {
-		err := p.Stop()
-		errs = append(errs, err)
-	}
-	return errorsutil.NewAggregate(errs)
-}
-
-// disableResourceRenamingInConfig temporarily disable the resource renaming feature of the plugin.
-// We plan to reeenable this feature in a future release.
-func disableResourceRenamingInConfig(config *spec.Config) {
-	// Disable resource renaming through config.Resource
-	if len(config.Resources.GPUs) > 0 || len(config.Resources.MIGs) > 0 {
-		klog.Infof("Customizing the 'resources' field is not yet supported in the config. Ignoring...")
-	}
-	config.Resources.GPUs = nil
-	config.Resources.MIGs = nil
-
-	// Disable renaming / device selection in Sharing.TimeSlicing.Resources
-	renameByDefault := config.Sharing.TimeSlicing.RenameByDefault
-	setsNonDefaultRename := false
-	setsDevices := false
-	for i, r := range config.Sharing.TimeSlicing.Resources {
-		if !renameByDefault && r.Rename != "" {
-			setsNonDefaultRename = true
-			config.Sharing.TimeSlicing.Resources[i].Rename = ""
-		}
-		if renameByDefault && r.Rename != r.Name.DefaultSharedRename() {
-			setsNonDefaultRename = true
-			config.Sharing.TimeSlicing.Resources[i].Rename = r.Name.DefaultSharedRename()
-		}
-		if !r.Devices.All {
-			setsDevices = true
-			config.Sharing.TimeSlicing.Resources[i].Devices.All = true
-			config.Sharing.TimeSlicing.Resources[i].Devices.Count = 0
-			config.Sharing.TimeSlicing.Resources[i].Devices.List = nil
-		}
-	}
-	if setsNonDefaultRename {
-		klog.Warning("Setting the 'rename' field in sharing.timeSlicing.resources is not yet supported in the config. Ignoring...")
-	}
-	if setsDevices {
-		klog.Warning("Customizing the 'devices' field in sharing.timeSlicing.resources is not yet supported in the config. Ignoring...")
+		errs = errors.Join(errs, p.Stop())
 	}
+	return errs
 }
diff --git a/cmd/device-plugin/nvidia/plugin-manager.go b/cmd/device-plugin/nvidia/plugin-manager.go
index 237f6b759..02c435b17 100644
--- a/cmd/device-plugin/nvidia/plugin-manager.go
+++ b/cmd/device-plugin/nvidia/plugin-manager.go
@@ -1,82 +1,81 @@
 /*
-Copyright 2024 The HAMi Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package main
 
 import (
 	"fmt"
 
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/cdi"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
 
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cdi"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/plugin"
 )
 
-// NewPluginManager creates an NVML-based plugin manager.
-func NewPluginManager(config *nvidia.DeviceConfig) (manager.Interface, error) {
-	var err error
-	switch *config.Flags.MigStrategy {
-	case spec.MigStrategyNone:
-	case spec.MigStrategySingle:
-	case spec.MigStrategyMixed:
-	default:
-		return nil, fmt.Errorf("unknown strategy: %v", *config.Flags.MigStrategy)
-	}
-
-	nvmllib := nvml.New()
+// GetPlugins returns a set of plugins for the specified configuration.
+func GetPlugins(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *nvidia.DeviceConfig) ([]plugin.Interface, error) {
+	// TODO: We could consider passing this as an argument since it should already be used to construct nvmllib.
+	driverRoot := root(*config.Flags.Plugin.ContainerDriverRoot)
 
 	deviceListStrategies, err := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
 	if err != nil {
 		return nil, fmt.Errorf("invalid device list strategy: %v", err)
 	}
 
-	cdiEnabled := deviceListStrategies.IsCDIEnabled()
+	imexChannels, err := imex.GetChannels(config.Config, driverRoot.getDevRoot())
+	if err != nil {
+		return nil, fmt.Errorf("error querying IMEX channels: %w", err)
+	}
 
-	cdiHandler, err := cdi.New(
-		cdi.WithEnabled(cdiEnabled),
-		cdi.WithDriverRoot(*config.Flags.Plugin.ContainerDriverRoot),
+	cdiHandler, err := cdi.New(infolib, nvmllib, devicelib,
+		cdi.WithDeviceListStrategies(deviceListStrategies),
+		cdi.WithDriverRoot(string(driverRoot)),
+		cdi.WithDevRoot(driverRoot.getDevRoot()),
 		cdi.WithTargetDriverRoot(*config.Flags.NvidiaDriverRoot),
+		cdi.WithTargetDevRoot(*config.Flags.NvidiaDevRoot),
 		cdi.WithNvidiaCTKPath(*config.Flags.Plugin.NvidiaCTKPath),
-		cdi.WithNvml(nvmllib),
 		cdi.WithDeviceIDStrategy(*config.Flags.Plugin.DeviceIDStrategy),
 		cdi.WithVendor("k8s.device-plugin.nvidia.com"),
 		cdi.WithGdsEnabled(*config.Flags.GDSEnabled),
 		cdi.WithMofedEnabled(*config.Flags.MOFEDEnabled),
+		cdi.WithImexChannels(imexChannels),
 	)
 	if err != nil {
 		return nil, fmt.Errorf("unable to create cdi handler: %v", err)
 	}
 
-	m, err := manager.New(
-		manager.WithNVML(nvmllib),
-		manager.WithCDIEnabled(cdiEnabled),
-		manager.WithCDIHandler(cdiHandler),
-		manager.WithConfig(config),
-		manager.WithFailOnInitError(*config.Flags.FailOnInitError),
-		manager.WithMigStrategy(*config.Flags.MigStrategy),
+	plugins, err := plugin.New(infolib, nvmllib, devicelib,
+		plugin.WithCDIHandler(cdiHandler),
+		plugin.WithConfig(config),
+		plugin.WithDeviceListStrategies(deviceListStrategies),
+		plugin.WithFailOnInitError(*config.Flags.FailOnInitError),
+		plugin.WithImexChannels(imexChannels),
 	)
 	if err != nil {
-		return nil, fmt.Errorf("unable to create plugin manager: %v", err)
+		return nil, fmt.Errorf("unable to create plugins: %w", err)
 	}
 
-	if err := m.CreateCDISpecFile(); err != nil {
+	if err := cdiHandler.CreateSpecFile(); err != nil {
 		return nil, fmt.Errorf("unable to create cdi spec file: %v", err)
 	}
 
-	return m, nil
+	return plugins, nil
 }
diff --git a/cmd/device-plugin/nvidia/root.go b/cmd/device-plugin/nvidia/root.go
new file mode 100644
index 000000000..db9cec76e
--- /dev/null
+++ b/cmd/device-plugin/nvidia/root.go
@@ -0,0 +1,85 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+type root string
+
+func (r root) join(parts ...string) string {
+	return filepath.Join(append([]string{string(r)}, parts...)...)
+}
+
+// getDevRoot returns the dev root associated with the root.
+// If the root is not a dev root, this defaults to "/".
+func (r root) getDevRoot() string {
+	if r.isDevRoot() {
+		return string(r)
+	}
+	return "/"
+}
+
+// isDevRoot checks whether the specified root is a dev root.
+// A dev root is defined as a root containing a /dev folder.
+func (r root) isDevRoot() bool {
+	stat, err := os.Stat(filepath.Join(string(r), "dev"))
+	if err != nil {
+		return false
+	}
+	return stat.IsDir()
+}
+
+func (r root) tryResolveLibrary(libraryName string) string {
+	if r == "" || r == "/" {
+		return libraryName
+	}
+
+	librarySearchPaths := []string{
+		"/usr/lib64",
+		"/usr/lib/x86_64-linux-gnu",
+		"/usr/lib/aarch64-linux-gnu",
+		"/lib64",
+		"/lib/x86_64-linux-gnu",
+		"/lib/aarch64-linux-gnu",
+	}
+
+	for _, d := range librarySearchPaths {
+		l := r.join(d, libraryName)
+		resolved, err := resolveLink(l)
+		if err != nil {
+			continue
+		}
+		return resolved
+	}
+
+	return libraryName
+}
+
+// resolveLink finds the target of a symlink or the file itself in the
+// case of a regular file.
+// This is equivalent to running `readlink -f ${l}`.
+func resolveLink(l string) (string, error) {
+	resolved, err := filepath.EvalSymlinks(l)
+	if err != nil {
+		return "", fmt.Errorf("error resolving link '%v': %w", l, err)
+	}
+	return resolved, nil
+}
diff --git a/cmd/device-plugin/nvidia/vgpucfg.go b/cmd/device-plugin/nvidia/vgpucfg.go
index a6d2a8307..15c523c99 100644
--- a/cmd/device-plugin/nvidia/vgpucfg.go
+++ b/cmd/device-plugin/nvidia/vgpucfg.go
@@ -21,14 +21,14 @@ import (
 	"os"
 	"strings"
 
+	cli "github.com/urfave/cli/v2"
+	"k8s.io/klog/v2"
+
 	"github.com/Project-HAMi/HAMi/pkg/device"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
 	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/plugin"
 	"github.com/Project-HAMi/HAMi/pkg/util"
-
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
-	cli "github.com/urfave/cli/v2"
-	"k8s.io/klog/v2"
 )
 
 func addFlags() []cli.Flag {
@@ -97,8 +97,8 @@ func updateFromCLIFlag[T any](pflag **T, c *cli.Context, flagName string) {
 	}
 }
 
-func generateDeviceConfigFromNvidia(cfg *spec.Config, c *cli.Context, flags []cli.Flag) (nvidia.DeviceConfig, error) {
-	devcfg := nvidia.DeviceConfig{}
+func generateDeviceConfigFromNvidia(cfg *spec.Config, c *cli.Context, flags []cli.Flag) (*nvidia.DeviceConfig, error) {
+	devcfg := &nvidia.DeviceConfig{}
 	devcfg.Config = cfg
 
 	klog.Infoln("flags=", flags)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index addf74a25..e513b9fe2 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,5 +1,6 @@
-ARG GOLANG_IMAGE=golang:1.22.5-bullseye
-ARG NVIDIA_IMAGE=nvidia/cuda:12.2.0-devel-ubuntu20.04
+ARG GOLANG_IMAGE
+ARG NVIDIA_IMAGE
+ARG NVIDIA_DEVEL_IMAGE
 
 FROM $GOLANG_IMAGE AS build
 FROM $GOLANG_IMAGE AS gobuild
@@ -11,14 +12,14 @@ ADD . /k8s-vgpu
 RUN cd /k8s-vgpu && make all VERSION=$VERSION
 RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@v0.10.0
 
-FROM $NVIDIA_IMAGE AS nvbuild
+FROM $NVIDIA_DEVEL_IMAGE AS nvbuild
 COPY ./libvgpu /libvgpu
 WORKDIR /libvgpu
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update; apt-get -y install cmake
 RUN bash ./build.sh
 
-FROM nvidia/cuda:12.6.3-base-ubuntu22.04
+FROM $NVIDIA_IMAGE
 ENV NVIDIA_DISABLE_REQUIRE="true"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=utility
diff --git a/docker/Dockerfile.new b/docker/Dockerfile.new
new file mode 100644
index 000000000..33e8bcfb3
--- /dev/null
+++ b/docker/Dockerfile.new
@@ -0,0 +1,79 @@
+# Base image upgrade to UBI 9 with CUDA 12.6.3
+ARG GOLANG_VERSION=1.22.6
+FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi9 AS build
+
+# Install essential build tools
+RUN yum install -y \
+    wget make git gcc \
+     && \
+    rm -rf /var/cache/yum/*
+
+# Install Go manually for better version control
+RUN set -eux; \
+    arch="$(uname -m)"; \
+    case "${arch##*-}" in \
+        x86_64 | amd64) ARCH='amd64' ;; \
+        aarch64) ARCH='arm64' ;; \
+        *) echo "unsupported architecture" ; exit 1 ;; \
+    esac; \
+    wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
+    | tar -C /usr/local -xz
+
+ENV GOPATH /go
+ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
+
+# Main build process
+WORKDIR /build
+COPY . .
+RUN mkdir /artifacts
+ARG VERSION="N/A"
+ARG GIT_COMMIT="unknown"
+
+# Core components build
+RUN cd /k8s-vgpu && make all VERSION=$VERSION PREFIX=/artifacts
+
+# Install NVIDIA MIG tool
+RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@v0.10.0
+
+# GPU library build stage
+FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS nvbuild
+COPY ./libvgpu /libvgpu
+WORKDIR /libvgpu
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get -y update && apt-get -y install cmake
+RUN bash ./build.sh
+
+# Image cleanup stage
+FROM redhat/ubi9-minimal:latest AS minimal
+RUN rpm -qa --queryformat='^%{NAME}-\[0-9\].*\.%{ARCH}$\n' | sort -u > /tmp/package-names.minimal
+
+FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi9 AS base
+WORKDIR /cleanup
+COPY --from=minimal /tmp/package-names.minimal .
+COPY deployments/container/cleanup/* .
+RUN ./cleanup.sh
+
+# Final image composition
+FROM base
+ENV NVIDIA_DISABLE_REQUIRE="true" \
+    NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Metadata labels
+LABEL version="$VERSION" \
+    maintainer="opensource@4paradigm.com" \
+    io.k8s.display-name="HAMi vGPU Plugin" \
+    vendor="4paradigm" \
+    com.nvidia.git-commit=${GIT_COMMIT}
+
+# File system organization
+COPY ./LICENSE /k8s-vgpu/LICENSE
+COPY --from=build /artifacts/ /k8s-vgpu/bin/
+COPY --from=build /go/bin/nvidia-mig-parted /k8s-vgpu/bin/
+COPY --from=nvbuild /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/libvgpu.so."$VERSION"
+COPY ./docker/*.sh /k8s-vgpu/bin/
+COPY ./lib /k8s-vgpu/lib
+
+# Runtime configuration
+ENV PATH="/k8s-vgpu/bin:${PATH}"
+ENTRYPOINT ["entrypoint.sh"]
\ No newline at end of file
diff --git a/go.mod b/go.mod
index dacc9cb0c..a0edd200e 100644
--- a/go.mod
+++ b/go.mod
@@ -3,59 +3,68 @@ module github.com/Project-HAMi/HAMi
 go 1.22.2
 
 require (
-	github.com/NVIDIA/go-gpuallocator v0.3.2
-	github.com/NVIDIA/go-nvlib v0.2.0
-	github.com/NVIDIA/go-nvml v0.12.0-3
-	github.com/NVIDIA/k8s-device-plugin v0.15.0
-	github.com/NVIDIA/nvidia-container-toolkit v1.15.0
-	github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a
+	github.com/NVIDIA/go-gpuallocator v0.5.0
+	github.com/NVIDIA/go-nvlib v0.7.1
+	github.com/NVIDIA/go-nvml v0.12.4-1
+	github.com/NVIDIA/nvidia-container-toolkit v1.17.2
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/google/uuid v1.6.0
 	github.com/julienschmidt/httprouter v1.3.0
-	github.com/onsi/ginkgo/v2 v2.17.1
-	github.com/onsi/gomega v1.32.0
+	github.com/onsi/ginkgo/v2 v2.17.2
+	github.com/onsi/gomega v1.33.1
 	github.com/opencontainers/runtime-spec v1.2.0
+	github.com/opencontainers/selinux v1.11.0
 	github.com/prometheus/client_golang v1.18.0
 	github.com/sirupsen/logrus v1.9.3
 	github.com/spf13/cobra v1.8.1
-	github.com/stretchr/testify v1.9.0
-	github.com/urfave/cli/v2 v2.27.1
-	golang.org/x/net v0.35.0
+	github.com/stretchr/testify v1.10.0
+	github.com/urfave/cli/v2 v2.27.5
 	golang.org/x/term v0.29.0
 	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d
-	google.golang.org/grpc v1.63.2
-	google.golang.org/protobuf v1.33.0
+	google.golang.org/grpc v1.65.0
+	google.golang.org/protobuf v1.34.2
 	gopkg.in/yaml.v2 v2.4.0
 	gotest.tools/v3 v3.5.1
-	k8s.io/api v0.29.3
-	k8s.io/apimachinery v0.29.3
-	k8s.io/client-go v0.29.3
-	k8s.io/klog/v2 v2.120.1
+	k8s.io/api v0.31.1
+	k8s.io/apimachinery v0.31.1
+	k8s.io/client-go v0.31.1
+	k8s.io/klog/v2 v2.130.1
 	k8s.io/kube-scheduler v0.28.3
-	k8s.io/kubelet v0.29.3
+	k8s.io/kubelet v0.31.1
+	k8s.io/mount-utils v0.31.1
 	sigs.k8s.io/controller-runtime v0.16.3
-	tags.cncf.io/container-device-interface v0.7.1
+	sigs.k8s.io/node-feature-discovery v0.15.4
+	tags.cncf.io/container-device-interface v0.8.0
+	tags.cncf.io/container-device-interface/specs-go v0.8.0
+)
+
+require (
+	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
+	github.com/godbus/dbus/v5 v5.1.0 // indirect
+	github.com/moby/sys/mountinfo v0.7.1 // indirect
+	golang.org/x/net v0.35.0 // indirect
 )
 
 require (
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/cespare/xxhash/v2 v2.2.0 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
-	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/emicklei/go-restful/v3 v3.11.3 // indirect
 	github.com/evanphx/json-patch v5.9.0+incompatible // indirect
-	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-openapi/jsonpointer v0.20.2 // indirect
 	github.com/go-openapi/jsonreference v0.20.4 // indirect
 	github.com/go-openapi/swag v0.22.9 // indirect
-	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/gnostic-models v0.6.8 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
-	github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
+	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
+	github.com/google/renameio v1.0.1
 	github.com/imdario/mergo v0.3.16 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
@@ -67,35 +76,33 @@ require (
 	github.com/opencontainers/runc v1.1.14 // indirect
 	github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
-	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.0 // indirect
 	github.com/prometheus/common v0.48.0 // indirect
-	github.com/prometheus/procfs v0.13.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/spf13/pflag v1.0.5
 	github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
-	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/mod v0.17.0 // indirect
-	golang.org/x/oauth2 v0.17.0 // indirect
+	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
+	golang.org/x/mod v0.20.0
+	golang.org/x/oauth2 v0.21.0 // indirect
 	golang.org/x/sys v0.30.0 // indirect
 	golang.org/x/text v0.22.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
-	google.golang.org/appengine v1.6.8 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20240227032403-f107216b40e2 // indirect
-	k8s.io/utils v0.0.0-20240102154912-e7106e64919e // indirect
+	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+	k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
-	sigs.k8s.io/yaml v1.4.0 // indirect
-	tags.cncf.io/container-device-interface/specs-go v0.7.0 // indirect
+	sigs.k8s.io/yaml v1.4.0
 )
 
 replace (
 	github.com/Project-HAMi/HAMi/pkg/api => ./pkg/api
-	github.com/Project-HAMi/HAMi/pkg/device-plugin => ./pkg/device-plugin
+	// github.com/Project-HAMi/HAMi/pkg/device-plugin => ./pkg/device-plugin
 	github.com/Project-HAMi/HAMi/test/utils => ./test/utils
 	k8s.io/api => k8s.io/api v0.28.3
 	k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.28.3
@@ -119,4 +126,5 @@ replace (
 	k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.28.3
 	k8s.io/metrics => k8s.io/metrics v0.28.3
 	k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.28.3
+// github.com/Project-HAMi/HAMi/pkg/device-plugin => ./pkg/nvidia-plugin
 )
diff --git a/go.sum b/go.sum
index de9e20bbc..f9092b7c3 100644
--- a/go.sum
+++ b/go.sum
@@ -1,37 +1,34 @@
-github.com/NVIDIA/go-gpuallocator v0.3.2 h1:gXaGgFKrtsBOvbZTZIWQ81yr7voHm5keRCXb3VNjMMU=
-github.com/NVIDIA/go-gpuallocator v0.3.2/go.mod h1:OuqBvWRrs9+A783a753fK9YYP8P1BTf+T4Map+XfTUs=
-github.com/NVIDIA/go-nvlib v0.2.0 h1:roq+SDstbP1fcy2XVH7wB2Gz2/Ud7Q+NGQYOcVITVrA=
-github.com/NVIDIA/go-nvlib v0.2.0/go.mod h1:kFuLNTyD1tF6FbRFlk+/EdUW5BrkE+v1Y3A3/9zKSjA=
-github.com/NVIDIA/go-nvml v0.12.0-3 h1:QwfjYxEqIQVRhl8327g2Y3ZvKResPydpGSKtCIIK9jE=
-github.com/NVIDIA/go-nvml v0.12.0-3/go.mod h1:SOufGc5Wql+cxrIZ8RyJwVKDYxfbs4WPkHXqadcbfvA=
-github.com/NVIDIA/k8s-device-plugin v0.15.0 h1:QKfAo6Xpl5M4Y9hltlYrzHjwGR+vfeAuiiNNyFN4DoE=
-github.com/NVIDIA/k8s-device-plugin v0.15.0/go.mod h1:s6DHR9QG5+xAbWG7NniWTnrZI7wUojl1/hxeZClXm/U=
-github.com/NVIDIA/nvidia-container-toolkit v1.15.0 h1:YmYZUKJzhz/lJSVH6k1mk5IUCHpt8HwRtwMrtBoCzhQ=
-github.com/NVIDIA/nvidia-container-toolkit v1.15.0/go.mod h1:SUwxfwi+dl1LtVlpAnJEolxuZfCtAVmOKRGWhJYsiJI=
+github.com/NVIDIA/go-gpuallocator v0.5.0 h1:166ICvPv2dU9oZ2J3kJ4y3XdbGCi6LhXgFZJtrqeu3A=
+github.com/NVIDIA/go-gpuallocator v0.5.0/go.mod h1:zos5bTIN01hpQioOyu9oRKglrznImMQvm0bZllMmckw=
+github.com/NVIDIA/go-nvlib v0.7.1 h1:7HHPZxoCjSLm1NgaRRjuhI8ffMCpc5Vgpg5yxQYUff8=
+github.com/NVIDIA/go-nvlib v0.7.1/go.mod h1:2Kh2kYSP5IJ8EKf0/SYDzHiQKb9EJkwOf2LQzu6pXzY=
+github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc=
+github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
+github.com/NVIDIA/nvidia-container-toolkit v1.17.2 h1:iE6PK9SQH3HyDrOolu27xn3CJgURR3bDtnbfFrxdML8=
+github.com/NVIDIA/nvidia-container-toolkit v1.17.2/go.mod h1:R6bNf6ca0IjjACa0ncKGvsrx6zSjsgz8QkFyBDk5szU=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
 github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a h1:sP3PcgyIkRlHqfF3Jfpe/7G8kf/qpzG4C8r94y9hLbE=
-github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a/go.mod h1:xMRa4fJgXzSDFUCURSimOUgoSc+odohvO3uXT9xjqH0=
-github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
+github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/emicklei/go-restful/v3 v3.11.3 h1:yagOQz/38xJmcNeZJtrUcKjkHRltIaIFXKWeG1SkWGE=
 github.com/emicklei/go-restful/v3 v3.11.3/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
 github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
 github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
-github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
-github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo=
 github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA=
 github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbXz58sAx6Q=
@@ -40,27 +37,29 @@ github.com/go-openapi/jsonreference v0.20.4 h1:bKlDxQxQJgwpUSgOENiMPzCTBVuc7vTdX
 github.com/go-openapi/jsonreference v0.20.4/go.mod h1:5pZJyJP2MnYCpoeoMAql78cCHauHj0V9Lhc506VOpw4=
 github.com/go-openapi/swag v0.22.9 h1:XX2DssF+mQKM2DHsbgZK74y/zj4mo9I99+89xUmuZCE=
 github.com/go-openapi/swag v0.22.9/go.mod h1:3/OXnFfnMAwBD099SwYRk7GD3xOrr1iL7d/XNLXVVwE=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
+github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
 github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
+github.com/google/renameio v1.0.1 h1:Lh/jXZmvZxb0BBeSY5VKEfidcbcbenKjZFzM/q0fSeU=
+github.com/google/renameio v1.0.1/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -69,7 +68,6 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY
 github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
-github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -89,6 +87,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
+github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g=
+github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -97,10 +97,10 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY
 github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
-github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8=
-github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs=
-github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk=
-github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg=
+github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g=
+github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc=
+github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
+github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
 github.com/opencontainers/runc v1.1.14 h1:rgSuzbmgz5DUJjeSnw337TxDbRuqjs6iqQck/2weR6w=
 github.com/opencontainers/runc v1.1.14/go.mod h1:E4C2z+7BxR7GHXp0hAY53mek+x49X1LjPNeMTfRGvOA=
 github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
@@ -113,18 +113,19 @@ github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaL
 github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
 github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
 github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
 github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
 github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
 github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
-github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o=
-github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@@ -137,15 +138,14 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
-github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
-github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
+github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -153,11 +153,10 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
 github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
 github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 go.uber.org/zap v1.25.0 h1:4Hvk6GtkucQ790dqmj7l1eEnRdKm3k3ZUrUMS2d5+5c=
@@ -165,67 +164,38 @@ go.uber.org/zap v1.25.0/go.mod h1:JIAUzQIH94IC4fOJQm7gMmBJP5k7wQfdcnYdPoEXJYk=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb h1:c0vyKkb6yr3KR7jEfJaOSv4lG7xPkbN6r52aJz1d8a8=
+golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
-golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
-golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
-golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
-golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
 golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
 golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
-golang.org/x/oauth2 v0.17.0 h1:6m3ZPmLEFdVxKKWnKq4VqZ60gutO35zm+zrAHVmHyDQ=
-golang.org/x/oauth2 v0.17.0/go.mod h1:OzPDGQiuQMguemayvdylqddI7qcD9lnSDb+1FiwQ5HA=
+golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
+golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
-golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
 golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
+golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
-golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
-golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
 golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
-golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
-golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
-golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
 golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
 golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
-golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
-golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
-golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
-golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
 golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
@@ -234,7 +204,6 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -243,16 +212,12 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
 gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
-google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
-google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY=
-google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM=
-google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -272,25 +237,29 @@ k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A=
 k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8=
 k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4=
 k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo=
-k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
-k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/kube-openapi v0.0.0-20240227032403-f107216b40e2 h1:02WBxjyRwX4rJdl3XlWVjFbXT/kAKCsipoM8hQY3Dwo=
-k8s.io/kube-openapi v0.0.0-20240227032403-f107216b40e2/go.mod h1:B7Huvd1LKZtTYmY+nC6rnmN8lyGYT9lifBcPD5epL6k=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
 k8s.io/kube-scheduler v0.28.3 h1:sCvDOzRSDGCZ4whVykNoh/HbAZbwBMhbJ9xFab4QUCI=
 k8s.io/kube-scheduler v0.28.3/go.mod h1:bZ0V8rlDE2eoLl2At4mSdGBKe9k6cA9P0+AuJ6aG+Os=
 k8s.io/kubelet v0.28.3 h1:bp/uIf1R5F61BlFvFtzc4PDEiK7TtFcw3wFJlc0V0LM=
 k8s.io/kubelet v0.28.3/go.mod h1:E3NHYbp/v45Ao6AD0EOZnqO3L0R6Haks6Nm0+bnFwtU=
-k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ=
-k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+k8s.io/mount-utils v0.31.1 h1:f8UrH9kRynljmdNGM6BaCvFUON5ZPKDgE+ltmYqI4wA=
+k8s.io/mount-utils v0.31.1/go.mod h1:HV/VYBUGqYUj4vt82YltzpWvgv8FPg0G9ItyInT3NPU=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigwG62c4=
 sigs.k8s.io/controller-runtime v0.16.3/go.mod h1:j7bialYoSn142nv9sCOJmQgDXQXxnroFU4VnX/brVJ0=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
+sigs.k8s.io/node-feature-discovery v0.15.4 h1:IoSN/G+Bl94Liu+b862a3gx/rqCKdeUtcPxbL4VnOYg=
+sigs.k8s.io/node-feature-discovery v0.15.4/go.mod h1:vp165AxVdzCWYIKuaLkckGo53/D5OR+WSyePSUEIYQw=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
 sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
-tags.cncf.io/container-device-interface v0.7.1 h1:MATNCbAD1su9U6zwQe5BrQ2vGGp1GBayD70bYaxYCNE=
-tags.cncf.io/container-device-interface v0.7.1/go.mod h1:h1JVuOqTQVORp8DziaWKUCDNzAmN+zeCbqbqD30D0ZQ=
-tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg=
-tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=
+tags.cncf.io/container-device-interface v0.8.0 h1:8bCFo/g9WODjWx3m6EYl3GfUG31eKJbaggyBDxEldRc=
+tags.cncf.io/container-device-interface v0.8.0/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y=
+tags.cncf.io/container-device-interface/specs-go v0.8.0 h1:QYGFzGxvYK/ZLMrjhvY0RjpUavIn4KcmRmVP/JjdBTA=
+tags.cncf.io/container-device-interface/specs-go v0.8.0/go.mod h1:BhJIkjjPh4qpys+qm4DAYtUyryaTDg9zris+AczXyws=
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/api.go b/pkg/device-plugin/nvidiadevice/nvinternal/cdi/api.go
deleted file mode 100644
index 93c818b0e..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/api.go
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package cdi
-
-// Interface provides the API to the 'cdi' package
-//
-//go:generate moq -stub -out api_mock.go . Interface
-type Interface interface {
-	CreateSpecFile() error
-	QualifiedName(string, string) string
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/cdi.go b/pkg/device-plugin/nvidiadevice/nvinternal/cdi/cdi.go
deleted file mode 100644
index 45103b4a6..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/cdi.go
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package cdi
-
-import (
-	"fmt"
-	"path/filepath"
-
-	nvdevice "github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
-	roottransform "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
-	"github.com/sirupsen/logrus"
-	cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
-)
-
-const (
-	cdiRoot = "/var/run/cdi"
-)
-
-// cdiHandler creates CDI specs for devices assocatied with the device plugin.
-type cdiHandler struct {
-	logger           *logrus.Logger
-	nvml             nvml.Interface
-	nvdevice         nvdevice.Interface
-	driverRoot       string
-	targetDriverRoot string
-	nvidiaCTKPath    string
-	cdiRoot          string
-	vendor           string
-	deviceIDStrategy string
-
-	enabled      bool
-	gdsEnabled   bool
-	mofedEnabled bool
-
-	cdilibs map[string]nvcdi.Interface
-}
-
-var _ Interface = &cdiHandler{}
-
-// newHandler constructs a new instance of the 'cdi' interface.
-func newHandler(opts ...Option) (Interface, error) {
-	c := &cdiHandler{}
-	for _, opt := range opts {
-		opt(c)
-	}
-
-	if !c.enabled {
-		return &null{}, nil
-	}
-
-	if c.logger == nil {
-		c.logger = logrus.StandardLogger()
-	}
-	if c.nvml == nil {
-		c.nvml = nvml.New()
-	}
-	if c.nvdevice == nil {
-		c.nvdevice = nvdevice.New(nvdevice.WithNvml(c.nvml))
-	}
-	if c.deviceIDStrategy == "" {
-		c.deviceIDStrategy = "uuid"
-	}
-	if c.driverRoot == "" {
-		c.driverRoot = "/"
-	}
-	if c.targetDriverRoot == "" {
-		c.targetDriverRoot = c.driverRoot
-	}
-
-	deviceNamer, err := nvcdi.NewDeviceNamer(c.deviceIDStrategy)
-	if err != nil {
-		return nil, err
-	}
-
-	c.cdilibs = make(map[string]nvcdi.Interface)
-
-	c.cdilibs["gpu"], err = nvcdi.New(
-		nvcdi.WithLogger(c.logger),
-		nvcdi.WithNvmlLib(c.nvml),
-		nvcdi.WithDeviceLib(c.nvdevice),
-		nvcdi.WithNVIDIACTKPath(c.nvidiaCTKPath),
-		nvcdi.WithDriverRoot(c.driverRoot),
-		nvcdi.WithDeviceNamers(deviceNamer),
-		nvcdi.WithVendor(c.vendor),
-		nvcdi.WithClass("gpu"),
-	)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create nvcdi library: %v", err)
-	}
-
-	var additionalModes []string
-	if c.gdsEnabled {
-		additionalModes = append(additionalModes, "gds")
-	}
-	if c.mofedEnabled {
-		additionalModes = append(additionalModes, "mofed")
-	}
-
-	for _, mode := range additionalModes {
-		lib, err := nvcdi.New(
-			nvcdi.WithLogger(c.logger),
-			nvcdi.WithNVIDIACTKPath(c.nvidiaCTKPath),
-			nvcdi.WithDriverRoot(c.driverRoot),
-			nvcdi.WithVendor(c.vendor),
-			nvcdi.WithMode(mode),
-		)
-		if err != nil {
-			return nil, fmt.Errorf("failed to create nvcdi library: %v", err)
-		}
-		c.cdilibs[mode] = lib
-	}
-
-	return c, nil
-}
-
-// CreateSpecFile creates a CDI spec file for the specified devices.
-func (cdi *cdiHandler) CreateSpecFile() error {
-	for class, cdilib := range cdi.cdilibs {
-		cdi.logger.Infof("Generating CDI spec for resource: %s/%s", cdi.vendor, class)
-
-		if class == "gpu" {
-			ret := cdi.nvml.Init()
-			if ret != nvml.SUCCESS {
-				return fmt.Errorf("failed to initialize NVML: %v", ret)
-			}
-			defer cdi.nvml.Shutdown()
-		}
-
-		spec, err := cdilib.GetSpec()
-		if err != nil {
-			return fmt.Errorf("failed to get CDI spec: %v", err)
-		}
-
-		err = roottransform.New(
-			roottransform.WithRoot(cdi.driverRoot),
-			roottransform.WithTargetRoot(cdi.targetDriverRoot),
-		).Transform(spec.Raw())
-		if err != nil {
-			return fmt.Errorf("failed to transform driver root in CDI spec: %v", err)
-		}
-
-		raw := spec.Raw()
-		specName, err := cdiapi.GenerateNameForSpec(raw)
-		if err != nil {
-			return fmt.Errorf("failed to generate spec name: %v", err)
-		}
-
-		err = spec.Save(filepath.Join(cdiRoot, specName+".json"))
-		if err != nil {
-			return fmt.Errorf("failed to save CDI spec: %v", err)
-		}
-	}
-
-	return nil
-}
-
-// QualifiedName constructs a CDI qualified device name for the specified resources.
-// Note: This assumes that the specified id matches the device name returned by the naming strategy.
-func (cdi *cdiHandler) QualifiedName(class string, id string) string {
-	return cdiapi.QualifiedName(cdi.vendor, class, id)
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/factory.go b/pkg/device-plugin/nvidiadevice/nvinternal/cdi/factory.go
deleted file mode 100644
index 01173c62a..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/factory.go
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package cdi
-
-import (
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-
-	"k8s.io/klog/v2"
-)
-
-// New is a factory method that creates a CDI handler for creating CDI specs.
-func New(opts ...Option) (Interface, error) {
-	infolib := info.New()
-
-	hasNVML, _ := infolib.HasNvml()
-	if !hasNVML {
-		klog.Warning("No valid resources detected, creating a null CDI handler")
-		return NewNullHandler(), nil
-	}
-
-	return newHandler(opts...)
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/null.go b/pkg/device-plugin/nvidiadevice/nvinternal/cdi/null.go
deleted file mode 100644
index e5a46c73c..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/null.go
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package cdi
-
-import (
-	"k8s.io/klog/v2"
-)
-
-type null struct{}
-
-var _ Interface = &null{}
-
-// NewNullHandler returns an instance of the 'cdi' interface that can
-// be used when CDI specs are not required.
-func NewNullHandler() Interface {
-	return &null{}
-}
-
-// CreateSpecFile is a no-op for the null handler.
-func (n *null) CreateSpecFile() error {
-	return nil
-}
-
-// QualifiedName is a no-op for the null handler. A error message is logged
-// inidicating this should never be called for the null handler.
-func (n *null) QualifiedName(class string, id string) string {
-	klog.Error("cannot return a qualified CDI device name with the null CDI handler")
-	return ""
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/options.go b/pkg/device-plugin/nvidiadevice/nvinternal/cdi/options.go
deleted file mode 100644
index 77bb69f4a..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/options.go
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package cdi
-
-import (
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-)
-
-// Option defines a function for passing options to the New() call
-type Option func(*cdiHandler)
-
-// WithEnabled provides an Option to set the enabled flag used by the 'cdi' interface
-func WithEnabled(enabled bool) Option {
-	return func(c *cdiHandler) {
-		c.enabled = enabled
-	}
-}
-
-// WithDriverRoot provides an Option to set the driver root used by the 'cdi' interface
-func WithDriverRoot(root string) Option {
-	return func(c *cdiHandler) {
-		c.driverRoot = root
-	}
-}
-
-// WithTargetDriverRoot provides an Option to set the target driver root used by the 'cdi' interface
-func WithTargetDriverRoot(root string) Option {
-	return func(c *cdiHandler) {
-		c.targetDriverRoot = root
-	}
-}
-
-// WithNvidiaCTKPath provides an Option to set the nvidia-ctk path used by the 'cdi' interface
-func WithNvidiaCTKPath(path string) Option {
-	return func(c *cdiHandler) {
-		c.nvidiaCTKPath = path
-	}
-}
-
-// WithNvml provides an Option to set the NVML library used by the 'cdi' interface
-func WithNvml(nvml nvml.Interface) Option {
-	return func(c *cdiHandler) {
-		c.nvml = nvml
-	}
-}
-
-// WithDeviceIDStrategy provides an Option to set the device ID strategy used by the 'cdi' interface
-func WithDeviceIDStrategy(strategy string) Option {
-	return func(c *cdiHandler) {
-		c.deviceIDStrategy = strategy
-	}
-}
-
-// WithVendor provides an Option to set the vendor used by the 'cdi' interface
-func WithVendor(vendor string) Option {
-	return func(c *cdiHandler) {
-		c.vendor = vendor
-	}
-}
-
-// WithGdsEnabled provides and option to set whether a GDS CDI spec should be generated
-func WithGdsEnabled(enabled bool) Option {
-	return func(c *cdiHandler) {
-		c.gdsEnabled = enabled
-	}
-}
-
-// WithMofedEnabled provides and option to set whether a MOFED CDI spec should be generated
-func WithMofedEnabled(enabled bool) Option {
-	return func(c *cdiHandler) {
-		c.mofedEnabled = enabled
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/info/version.go b/pkg/device-plugin/nvidiadevice/nvinternal/info/version.go
deleted file mode 100644
index 503b5ded8..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/info/version.go
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package info
-
-import "strings"
-
-// version must be set by go build's -X main.version= option in the Makefile.
-var version = "unknown"
-
-// gitCommit will be the hash that the binary was built from
-// and will be populated by the Makefile.
-var gitCommit = ""
-
-// GetVersionParts returns the different version components.
-func GetVersionParts() []string {
-	v := []string{version}
-
-	if gitCommit != "" {
-		v = append(v, "commit: "+gitCommit)
-	}
-
-	return v
-}
-
-// GetVersionString returns the string representation of the version.
-func GetVersionString(more ...string) string {
-	v := append(GetVersionParts(), more...)
-	return strings.Join(v, "\n")
-}
-
-// GetVersion returns the version of the binary.
-func GetVersion() string {
-	return version
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/api.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/api.go
deleted file mode 100644
index 1066c035a..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/api.go
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package plugin
-
-import "github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/rm"
-
-// Interface defines the API for the plugin package
-type Interface interface {
-	Devices() rm.Devices
-	Start() error
-	Stop() error
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/api.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/api.go
deleted file mode 100644
index 59f7e9c71..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/api.go
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import "github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
-
-// Interface defines the API for the plugin manager package
-type Interface interface {
-	GetPlugins() ([]plugin.Interface, error)
-	CreateCDISpecFile() error
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/factory.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/factory.go
deleted file mode 100644
index 5286fe699..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/factory.go
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import (
-	"fmt"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	"k8s.io/klog/v2"
-
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/cdi"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-)
-
-type manager struct {
-	migStrategy     string
-	failOnInitError bool
-	nvmllib         nvml.Interface
-
-	cdiHandler cdi.Interface
-	cdiEnabled bool
-	config     *nvidia.DeviceConfig
-	infolib    info.Interface
-}
-
-// New creates a new plugin manager with the supplied options.
-func New(opts ...Option) (Interface, error) {
-	m := &manager{}
-	for _, opt := range opts {
-		opt(m)
-	}
-
-	if m.config == nil {
-		klog.Warning("no config provided, returning a null manager")
-		return &null{}, nil
-	}
-
-	if m.infolib == nil {
-		m.infolib = info.New()
-	}
-	if m.cdiHandler == nil {
-		m.cdiHandler = cdi.NewNullHandler()
-	}
-
-	mode, err := m.resolveMode()
-	if err != nil {
-		return nil, err
-	}
-
-	if mode != "nvml" && m.cdiEnabled {
-		klog.Warning("CDI is not supported; disabling CDI.")
-		m.cdiEnabled = false
-	}
-
-	switch mode {
-	case "nvml":
-		if m.nvmllib == nil {
-			m.nvmllib = nvml.New()
-		}
-		ret := m.nvmllib.Init()
-		if ret != nvml.SUCCESS {
-			klog.Errorf("Failed to initialize NVML: %v.", ret)
-			klog.Errorf("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
-			klog.Errorf("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
-			klog.Errorf("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
-			klog.Errorf("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
-			if m.failOnInitError {
-				return nil, fmt.Errorf("nvml init failed: %v", ret)
-			}
-			klog.Warningf("nvml init failed: %v", ret)
-			return &null{}, nil
-		}
-		defer m.nvmllib.Shutdown()
-
-		return (*nvmlmanager)(m), nil
-	case "tegra":
-		return (*tegramanager)(m), nil
-	case "null":
-		return &null{}, nil
-	}
-
-	return nil, fmt.Errorf("unknown mode: %v", mode)
-}
-
-func (m *manager) resolveMode() (string, error) {
-	// logWithReason logs the output of the has* / is* checks from the info.Interface
-	logWithReason := func(f func() (bool, string), tag string) bool {
-		is, reason := f()
-		if !is {
-			tag = "non-" + tag
-		}
-		klog.Infof("Detected %v platform: %v", tag, reason)
-		return is
-	}
-
-	hasNVML := logWithReason(m.infolib.HasNvml, "NVML")
-	isTegra := logWithReason(m.infolib.IsTegraSystem, "Tegra")
-
-	if !hasNVML && !isTegra {
-		klog.Error("Incompatible platform detected")
-		klog.Error("If this is a GPU node, did you configure the NVIDIA Container Toolkit?")
-		klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
-		klog.Error("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
-		klog.Error("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
-		if m.failOnInitError {
-			return "", fmt.Errorf("platform detection failed")
-		}
-		return "null", nil
-	}
-
-	// The NVIDIA container stack does not yet support the use of integrated AND discrete GPUs on the same node.
-	if isTegra {
-		if hasNVML {
-			klog.Warning("Disabling Tegra-based resources on NVML system")
-			return "nvml", nil
-		}
-		return "tegra", nil
-	}
-
-	return "nvml", nil
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/null.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/null.go
deleted file mode 100644
index 8cfcae63a..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/null.go
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import (
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
-)
-
-type null struct{}
-
-// GetPlugins returns an empty set of Plugins for the null manager
-func (m *null) GetPlugins() ([]plugin.Interface, error) {
-	return nil, nil
-}
-
-// CreateCDISpecFile creates the spec is a no-op for the null plugin
-func (m *null) CreateCDISpecFile() error {
-	return nil
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/nvml.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/nvml.go
deleted file mode 100644
index 35abe3abe..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/nvml.go
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import (
-	"fmt"
-
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/rm"
-)
-
-type nvmlmanager manager
-
-// GetPlugins returns the plugins associated with the NVML resources available on the node
-func (m *nvmlmanager) GetPlugins() ([]plugin.Interface, error) {
-	rms, err := rm.NewNVMLResourceManagers(m.nvmllib, m.config)
-	if err != nil {
-		return nil, fmt.Errorf("failed to construct NVML resource managers: %v", err)
-	}
-
-	var plugins []plugin.Interface
-	for _, r := range rms {
-		plugins = append(plugins, plugin.NewNvidiaDevicePlugin(m.config, r, m.cdiHandler, m.cdiEnabled))
-	}
-	return plugins, nil
-}
-
-// CreateCDISpecFile creates forwards the request to the CDI handler
-func (m *nvmlmanager) CreateCDISpecFile() error {
-	return m.cdiHandler.CreateSpecFile()
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/options.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/options.go
deleted file mode 100644
index 44e87beed..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/options.go
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import (
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/cdi"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-)
-
-// Option is a function that configures a manager
-type Option func(*manager)
-
-// WithCDIEnabled sets whether CDI is enabled for the manager
-func WithCDIEnabled(enabled bool) Option {
-	return func(m *manager) {
-		m.cdiEnabled = enabled
-	}
-}
-
-// WithCDIHandler sets the CDI handler for the manager
-func WithCDIHandler(handler cdi.Interface) Option {
-	return func(m *manager) {
-		m.cdiHandler = handler
-	}
-}
-
-// WithNVML sets the NVML handler for the manager
-func WithNVML(nvmllib nvml.Interface) Option {
-	return func(m *manager) {
-		m.nvmllib = nvmllib
-	}
-}
-
-// WithFailOnInitError sets whether the manager should fail on initialization errors
-func WithFailOnInitError(failOnInitError bool) Option {
-	return func(m *manager) {
-		m.failOnInitError = failOnInitError
-	}
-}
-
-// WithMigStrategy sets the MIG strategy for the manager
-func WithMigStrategy(migStrategy string) Option {
-	return func(m *manager) {
-		m.migStrategy = migStrategy
-	}
-}
-
-// WithConfig sets the config reference for the manager
-func WithConfig(config *nvidia.DeviceConfig) Option {
-	return func(m *manager) {
-		m.config = config
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/tegra.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/tegra.go
deleted file mode 100644
index 8c1801e26..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/manager/tegra.go
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package manager
-
-import (
-	"fmt"
-
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/rm"
-)
-
-type tegramanager manager
-
-// GetPlugins returns the plugins associated with the NVML resources available on the node
-func (m *tegramanager) GetPlugins() ([]plugin.Interface, error) {
-	rms, err := rm.NewTegraResourceManagers(m.config)
-	if err != nil {
-		return nil, fmt.Errorf("failed to construct NVML resource managers: %v", err)
-	}
-
-	var plugins []plugin.Interface
-	for _, r := range rms {
-		plugins = append(plugins, plugin.NewNvidiaDevicePlugin(m.config, r, m.cdiHandler, m.cdiEnabled))
-	}
-	return plugins, nil
-}
-
-// CreateCDISpecFile creates the spec is a no-op for the tegra plugin
-func (m *tegramanager) CreateCDISpecFile() error {
-	return nil
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
deleted file mode 100644
index 087e7cf6c..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
+++ /dev/null
@@ -1,671 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package plugin
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"net"
-	"os"
-	"os/exec"
-	"path"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"time"
-
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
-	cdiapi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
-	"github.com/google/uuid"
-	"golang.org/x/net/context"
-	"google.golang.org/grpc"
-	"k8s.io/apimachinery/pkg/util/yaml"
-	"k8s.io/klog/v2"
-	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
-
-	"github.com/Project-HAMi/HAMi/pkg/device"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/cdi"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/rm"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-	"github.com/Project-HAMi/HAMi/pkg/util"
-)
-
-// Constants for use by the 'volume-mounts' device list strategy
-const (
-	deviceListAsVolumeMountsHostPath          = "/dev/null"
-	deviceListAsVolumeMountsContainerPathRoot = "/var/run/nvidia-container-devices"
-	NodeLockNvidia                            = "hami.io/mutex.lock"
-)
-
-var (
-	hostHookPath string
-	ConfigFile   *string
-)
-
-func init() {
-	hostHookPath, _ = os.LookupEnv("HOOK_PATH")
-}
-
-// NvidiaDevicePlugin implements the Kubernetes device plugin API
-type NvidiaDevicePlugin struct {
-	rm                   rm.ResourceManager
-	config               *nvidia.DeviceConfig
-	deviceListEnvvar     string
-	deviceListStrategies spec.DeviceListStrategies
-	socket               string
-	schedulerConfig      nvidia.NvidiaConfig
-
-	cdiHandler          cdi.Interface
-	cdiEnabled          bool
-	cdiAnnotationPrefix string
-
-	operatingMode string
-	migCurrent    nvidia.MigPartedSpec
-
-	server *grpc.Server
-	health chan *rm.Device
-	stop   chan interface{}
-}
-
-func readFromConfigFile(sConfig *nvidia.NvidiaConfig) (string, error) {
-	jsonbyte, err := os.ReadFile("/config/config.json")
-	mode := "hami-core"
-	if err != nil {
-		return "", err
-	}
-	var deviceConfigs nvidia.DevicePluginConfigs
-	err = json.Unmarshal(jsonbyte, &deviceConfigs)
-	if err != nil {
-		return "", err
-	}
-	klog.Infof("Device Plugin Configs: %v", fmt.Sprintf("%v", deviceConfigs))
-	for _, val := range deviceConfigs.Nodeconfig {
-		if os.Getenv(util.NodeNameEnvName) == val.Name {
-			klog.Infof("Reading config from file %s", val.Name)
-			if val.Devicememoryscaling > 0 {
-				sConfig.DeviceMemoryScaling = val.Devicememoryscaling
-			}
-			if val.Devicecorescaling > 0 {
-				sConfig.DeviceCoreScaling = val.Devicecorescaling
-			}
-			if val.Devicesplitcount > 0 {
-				sConfig.DeviceSplitCount = val.Devicesplitcount
-			}
-			if val.FilterDevice != nil && (len(val.FilterDevice.UUID) > 0 || len(val.FilterDevice.Index) > 0) {
-				nvidia.DevicePluginFilterDevice = val.FilterDevice
-			}
-			if len(val.OperatingMode) > 0 {
-				mode = val.OperatingMode
-			}
-			klog.Infof("FilterDevice: %v", val.FilterDevice)
-		}
-	}
-	return mode, nil
-}
-
-// NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
-func NewNvidiaDevicePlugin(config *nvidia.DeviceConfig, resourceManager rm.ResourceManager, cdiHandler cdi.Interface, cdiEnabled bool) *NvidiaDevicePlugin {
-	_, name := resourceManager.Resource().Split()
-
-	deviceListStrategies, _ := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
-
-	sConfig, err := device.LoadConfig(*ConfigFile)
-	klog.Infoln("reading config=", config, "resourceName", config.ResourceName, "configfile=", *ConfigFile, "sconfig=", sConfig)
-	if err != nil {
-		klog.Fatalf(`failed to load device config file %s: %v`, *ConfigFile, err)
-	}
-	mode, err := readFromConfigFile(&sConfig.NvidiaConfig)
-	if err != nil {
-		klog.Errorf("readFromConfigFile err:%s", err.Error())
-	}
-	// Initialize devices with configuration
-	if err := device.InitDevicesWithConfig(sConfig); err != nil {
-		klog.Fatalf("failed to initialize devices: %v", err)
-	}
-	return &NvidiaDevicePlugin{
-		rm:                   resourceManager,
-		config:               config,
-		deviceListEnvvar:     "NVIDIA_VISIBLE_DEVICES",
-		deviceListStrategies: deviceListStrategies,
-		socket:               kubeletdevicepluginv1beta1.DevicePluginPath + "nvidia-" + name + ".sock",
-		cdiHandler:           cdiHandler,
-		cdiEnabled:           cdiEnabled,
-		cdiAnnotationPrefix:  *config.Flags.Plugin.CDIAnnotationPrefix,
-		schedulerConfig:      sConfig.NvidiaConfig,
-		operatingMode:        mode,
-		migCurrent:           nvidia.MigPartedSpec{},
-
-		// These will be reinitialized every
-		// time the plugin server is restarted.
-		server: nil,
-		health: nil,
-		stop:   nil,
-	}
-}
-
-func (plugin *NvidiaDevicePlugin) initialize() {
-	plugin.server = grpc.NewServer([]grpc.ServerOption{}...)
-	plugin.health = make(chan *rm.Device)
-	plugin.stop = make(chan interface{})
-}
-
-func (plugin *NvidiaDevicePlugin) cleanup() {
-	close(plugin.stop)
-	plugin.server = nil
-	plugin.health = nil
-	plugin.stop = nil
-}
-
-// Devices returns the full set of devices associated with the plugin.
-func (plugin *NvidiaDevicePlugin) Devices() rm.Devices {
-	return plugin.rm.Devices()
-}
-
-// Start starts the gRPC server, registers the device plugin with the Kubelet,
-// and starts the device healthchecks.
-func (plugin *NvidiaDevicePlugin) Start() error {
-	plugin.initialize()
-
-	err := plugin.Serve()
-	if err != nil {
-		klog.Infof("Could not start device plugin for '%s': %s", plugin.rm.Resource(), err)
-		plugin.cleanup()
-		return err
-	}
-	klog.Infof("Starting to serve '%s' on %s", plugin.rm.Resource(), plugin.socket)
-
-	err = plugin.Register()
-	if err != nil {
-		klog.Infof("Could not register device plugin: %s", err)
-		plugin.Stop()
-		return err
-	}
-	klog.Infof("Registered device plugin for '%s' with Kubelet", plugin.rm.Resource())
-
-	if plugin.operatingMode == "mig" {
-		cmd := exec.Command("nvidia-mig-parted", "export")
-		var stdout, stderr bytes.Buffer
-		cmd.Stdout = &stdout
-		cmd.Stderr = &stderr
-		err := cmd.Run()
-		if err != nil {
-			klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
-		}
-		outStr := stdout.Bytes()
-		yaml.Unmarshal(outStr, &plugin.migCurrent)
-		os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm)
-		if len(plugin.migCurrent.MigConfigs["current"]) == 1 && len(plugin.migCurrent.MigConfigs["current"][0].Devices) == 0 {
-			idx := 0
-			plugin.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0)
-			for idx < GetDeviceNums() {
-				plugin.migCurrent.MigConfigs["current"][0].Devices = append(plugin.migCurrent.MigConfigs["current"][0].Devices, int32(idx))
-				idx++
-			}
-		}
-		klog.Infoln("Mig export", plugin.migCurrent)
-	}
-	go func() {
-		err := plugin.rm.CheckHealth(plugin.stop, plugin.health)
-		if err != nil {
-			klog.Infof("Failed to start health check: %v; continuing with health checks disabled", err)
-		}
-	}()
-
-	go func() {
-		plugin.WatchAndRegister()
-	}()
-
-	return nil
-}
-
-// Stop stops the gRPC server.
-func (plugin *NvidiaDevicePlugin) Stop() error {
-	if plugin == nil || plugin.server == nil {
-		return nil
-	}
-	klog.Infof("Stopping to serve '%s' on %s", plugin.rm.Resource(), plugin.socket)
-	plugin.server.Stop()
-	if err := os.Remove(plugin.socket); err != nil && !os.IsNotExist(err) {
-		return err
-	}
-	plugin.cleanup()
-	return nil
-}
-
-// Serve starts the gRPC server of the device plugin.
-func (plugin *NvidiaDevicePlugin) Serve() error {
-	os.Remove(plugin.socket)
-	sock, err := net.Listen("unix", plugin.socket)
-	if err != nil {
-		return err
-	}
-
-	kubeletdevicepluginv1beta1.RegisterDevicePluginServer(plugin.server, plugin)
-
-	go func() {
-		lastCrashTime := time.Now()
-		restartCount := 0
-		for {
-			klog.Infof("Starting GRPC server for '%s'", plugin.rm.Resource())
-			err := plugin.server.Serve(sock)
-			if err == nil {
-				break
-			}
-
-			klog.Infof("GRPC server for '%s' crashed with error: %v", plugin.rm.Resource(), err)
-
-			// restart if it has not been too often
-			// i.e. if server has crashed more than 5 times and it didn't last more than one hour each time
-			if restartCount > 5 {
-				// quit
-				klog.Fatalf("GRPC server for '%s' has repeatedly crashed recently. Quitting", plugin.rm.Resource())
-			}
-			timeSinceLastCrash := time.Since(lastCrashTime).Seconds()
-			lastCrashTime = time.Now()
-			if timeSinceLastCrash > 3600 {
-				// it has been one hour since the last crash.. reset the count
-				// to reflect on the frequency
-				restartCount = 1
-			} else {
-				restartCount++
-			}
-		}
-	}()
-
-	// Wait for server to start by launching a blocking connexion
-	conn, err := plugin.dial(plugin.socket, 5*time.Second)
-	if err != nil {
-		return err
-	}
-	conn.Close()
-
-	return nil
-}
-
-// Register registers the device plugin for the given resourceName with Kubelet.
-func (plugin *NvidiaDevicePlugin) Register() error {
-	conn, err := plugin.dial(kubeletdevicepluginv1beta1.KubeletSocket, 5*time.Second)
-	if err != nil {
-		return err
-	}
-	defer conn.Close()
-
-	client := kubeletdevicepluginv1beta1.NewRegistrationClient(conn)
-	reqt := &kubeletdevicepluginv1beta1.RegisterRequest{
-		Version:      kubeletdevicepluginv1beta1.Version,
-		Endpoint:     path.Base(plugin.socket),
-		ResourceName: string(plugin.rm.Resource()),
-		Options: &kubeletdevicepluginv1beta1.DevicePluginOptions{
-			GetPreferredAllocationAvailable: false,
-		},
-	}
-
-	_, err = client.Register(context.Background(), reqt)
-	if err != nil {
-		return err
-	}
-	return nil
-}
-
-// GetDevicePluginOptions returns the values of the optional settings for this plugin
-func (plugin *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *kubeletdevicepluginv1beta1.Empty) (*kubeletdevicepluginv1beta1.DevicePluginOptions, error) {
-	options := &kubeletdevicepluginv1beta1.DevicePluginOptions{
-		GetPreferredAllocationAvailable: false,
-	}
-	return options, nil
-}
-
-// ListAndWatch lists devices and update that list according to the health status
-func (plugin *NvidiaDevicePlugin) ListAndWatch(e *kubeletdevicepluginv1beta1.Empty, s kubeletdevicepluginv1beta1.DevicePlugin_ListAndWatchServer) error {
-	s.Send(&kubeletdevicepluginv1beta1.ListAndWatchResponse{Devices: plugin.apiDevices()})
-
-	for {
-		select {
-		case <-plugin.stop:
-			return nil
-		case d := <-plugin.health:
-			// FIXME: there is no way to recover from the Unhealthy state.
-			d.Health = kubeletdevicepluginv1beta1.Unhealthy
-			klog.Infof("'%s' device marked unhealthy: %s", plugin.rm.Resource(), d.ID)
-			s.Send(&kubeletdevicepluginv1beta1.ListAndWatchResponse{Devices: plugin.apiDevices()})
-		}
-	}
-}
-
-// GetPreferredAllocation returns the preferred allocation from the set of devices specified in the request
-func (plugin *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r *kubeletdevicepluginv1beta1.PreferredAllocationRequest) (*kubeletdevicepluginv1beta1.PreferredAllocationResponse, error) {
-	response := &kubeletdevicepluginv1beta1.PreferredAllocationResponse{}
-	/*for _, req := range r.ContainerRequests {
-		devices, err := plugin.rm.GetPreferredAllocation(req.AvailableDeviceIDs, req.MustIncludeDeviceIDs, int(req.AllocationSize))
-		if err != nil {
-			return nil, fmt.Errorf("error getting list of preferred allocation devices: %v", err)
-		}
-
-		resp := &kubeletdevicepluginv1beta1.ContainerPreferredAllocationResponse{
-			DeviceIDs: devices,
-		}
-
-		response.ContainerResponses = append(response.ContainerResponses, resp)
-	}*/
-	return response, nil
-}
-
-// Allocate which return list of devices.
-func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.AllocateRequest) (*kubeletdevicepluginv1beta1.AllocateResponse, error) {
-	klog.InfoS("Allocate", "request", reqs)
-	responses := kubeletdevicepluginv1beta1.AllocateResponse{}
-	nodename := os.Getenv(util.NodeNameEnvName)
-	current, err := util.GetPendingPod(ctx, nodename)
-	if err != nil {
-		//nodelock.ReleaseNodeLock(nodename, NodeLockNvidia, current)
-		return &kubeletdevicepluginv1beta1.AllocateResponse{}, err
-	}
-	klog.Infof("Allocate pod name is %s/%s, annotation is %+v", current.Namespace, current.Name, current.Annotations)
-
-	for idx, req := range reqs.ContainerRequests {
-		// If the devices being allocated are replicas, then (conditionally)
-		// error out if more than one resource is being allocated.
-
-		if strings.Contains(req.DevicesIDs[0], "MIG") {
-			if plugin.config.Sharing.TimeSlicing.FailRequestsGreaterThanOne && rm.AnnotatedIDs(req.DevicesIDs).AnyHasAnnotations() {
-				if len(req.DevicesIDs) > 1 {
-					device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-					return nil, fmt.Errorf("request for '%v: %v' too large: maximum request size for shared resources is 1", plugin.rm.Resource(), len(req.DevicesIDs))
-				}
-			}
-
-			for _, id := range req.DevicesIDs {
-				if !plugin.rm.Devices().Contains(id) {
-					device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-					return nil, fmt.Errorf("invalid allocation request for '%s': unknown device: %s", plugin.rm.Resource(), id)
-				}
-			}
-
-			response, err := plugin.getAllocateResponse(req.DevicesIDs)
-			if err != nil {
-				device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-				return nil, fmt.Errorf("failed to get allocate response: %v", err)
-			}
-			responses.ContainerResponses = append(responses.ContainerResponses, response)
-		} else {
-			currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current)
-			klog.Infoln("deviceAllocateFromAnnotation=", devreq)
-			if err != nil {
-				device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-				return &kubeletdevicepluginv1beta1.AllocateResponse{}, err
-			}
-			if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) {
-				device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-				return &kubeletdevicepluginv1beta1.AllocateResponse{}, errors.New("device number not matched")
-			}
-			response, err := plugin.getAllocateResponse(plugin.GetContainerDeviceStrArray(devreq))
-			if err != nil {
-				return nil, fmt.Errorf("failed to get allocate response: %v", err)
-			}
-
-			err = EraseNextDeviceTypeFromAnnotation(nvidia.NvidiaGPUDevice, *current)
-			if err != nil {
-				device.PodAllocationFailed(nodename, current, NodeLockNvidia)
-				return &kubeletdevicepluginv1beta1.AllocateResponse{}, err
-			}
-
-			if plugin.operatingMode != "mig" {
-				for i, dev := range devreq {
-					limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
-					response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem)
-				}
-				response.Envs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprint(devreq[0].Usedcores)
-				response.Envs["CUDA_DEVICE_MEMORY_SHARED_CACHE"] = fmt.Sprintf("%s/vgpu/%v.cache", hostHookPath, uuid.New().String())
-				if plugin.schedulerConfig.DeviceMemoryScaling > 1 {
-					response.Envs["CUDA_OVERSUBSCRIBE"] = "true"
-				}
-				if plugin.schedulerConfig.DisableCoreLimit {
-					response.Envs[util.CoreLimitSwitch] = "disable"
-				}
-				cacheFileHostDirectory := fmt.Sprintf("%s/vgpu/containers/%s_%s", hostHookPath, current.UID, currentCtr.Name)
-				os.RemoveAll(cacheFileHostDirectory)
-
-				os.MkdirAll(cacheFileHostDirectory, 0777)
-				os.Chmod(cacheFileHostDirectory, 0777)
-				os.MkdirAll("/tmp/vgpulock", 0777)
-				os.Chmod("/tmp/vgpulock", 0777)
-				response.Mounts = append(response.Mounts,
-					&kubeletdevicepluginv1beta1.Mount{ContainerPath: fmt.Sprintf("%s/vgpu/libvgpu.so", hostHookPath),
-						HostPath: GetLibPath(),
-						ReadOnly: true},
-					&kubeletdevicepluginv1beta1.Mount{ContainerPath: fmt.Sprintf("%s/vgpu", hostHookPath),
-						HostPath: cacheFileHostDirectory,
-						ReadOnly: false},
-					&kubeletdevicepluginv1beta1.Mount{ContainerPath: "/tmp/vgpulock",
-						HostPath: "/tmp/vgpulock",
-						ReadOnly: false},
-				)
-				found := false
-				for _, val := range currentCtr.Env {
-					if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
-						// if env existed but is set to false or can not be parsed, ignore
-						t, _ := strconv.ParseBool(val.Value)
-						if !t {
-							continue
-						}
-						// only env existed and set to true, we mark it "found"
-						found = true
-						break
-					}
-				}
-				if !found {
-					response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{ContainerPath: "/etc/ld.so.preload",
-						HostPath: hostHookPath + "/vgpu/ld.so.preload",
-						ReadOnly: true},
-					)
-				}
-				_, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath))
-				if err == nil {
-					response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{
-						ContainerPath: "/tmp/license",
-						HostPath:      fmt.Sprintf("%s/vgpu/license", hostHookPath),
-						ReadOnly:      true,
-					})
-					response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{
-						ContainerPath: "/usr/bin/vgpuvalidator",
-						HostPath:      fmt.Sprintf("%s/vgpu/vgpuvalidator", hostHookPath),
-						ReadOnly:      true,
-					})
-				}
-			}
-			responses.ContainerResponses = append(responses.ContainerResponses, response)
-		}
-	}
-	klog.Infoln("Allocate Response", responses.ContainerResponses)
-	device.PodAllocationTrySuccess(nodename, nvidia.NvidiaGPUDevice, NodeLockNvidia, current)
-	return &responses, nil
-}
-
-func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*kubeletdevicepluginv1beta1.ContainerAllocateResponse, error) {
-	deviceIDs := plugin.deviceIDsFromAnnotatedDeviceIDs(requestIds)
-
-	responseID := uuid.New().String()
-	response, err := plugin.getAllocateResponseForCDI(responseID, deviceIDs)
-	if err != nil {
-		return nil, fmt.Errorf("failed to get allocate response for CDI: %v", err)
-	}
-
-	response.Envs = plugin.apiEnvs(plugin.deviceListEnvvar, deviceIDs)
-	//if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyVolumeMounts) || plugin.deviceListStrategies.Includes(spec.DeviceListStrategyEnvvar) {
-	//	response.Envs = plugin.apiEnvs(plugin.deviceListEnvvar, deviceIDs)
-	//}
-	/*
-		if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyVolumeMounts) {
-			response.Envs = plugin.apiEnvs(plugin.deviceListEnvvar, []string{deviceListAsVolumeMountsContainerPathRoot})
-			response.Mounts = plugin.apiMounts(deviceIDs)
-		}*/
-	if *plugin.config.Flags.Plugin.PassDeviceSpecs {
-		response.Devices = plugin.apiDeviceSpecs(*plugin.config.Flags.NvidiaDriverRoot, requestIds)
-	}
-	if *plugin.config.Flags.GDSEnabled {
-		response.Envs["NVIDIA_GDS"] = "enabled"
-	}
-	if *plugin.config.Flags.MOFEDEnabled {
-		response.Envs["NVIDIA_MOFED"] = "enabled"
-	}
-
-	return &response, nil
-}
-
-// getAllocateResponseForCDI returns the allocate response for the specified device IDs.
-// This response contains the annotations required to trigger CDI injection in the container engine or nvidia-container-runtime.
-func (plugin *NvidiaDevicePlugin) getAllocateResponseForCDI(responseID string, deviceIDs []string) (kubeletdevicepluginv1beta1.ContainerAllocateResponse, error) {
-	response := kubeletdevicepluginv1beta1.ContainerAllocateResponse{}
-
-	if !plugin.cdiEnabled {
-		return response, nil
-	}
-
-	var devices []string
-	for _, id := range deviceIDs {
-		devices = append(devices, plugin.cdiHandler.QualifiedName("gpu", id))
-	}
-
-	if *plugin.config.Flags.GDSEnabled {
-		devices = append(devices, plugin.cdiHandler.QualifiedName("gds", "all"))
-	}
-	if *plugin.config.Flags.MOFEDEnabled {
-		devices = append(devices, plugin.cdiHandler.QualifiedName("mofed", "all"))
-	}
-
-	if len(devices) == 0 {
-		return response, nil
-	}
-
-	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyCDIAnnotations) {
-		annotations, err := plugin.getCDIDeviceAnnotations(responseID, devices)
-		if err != nil {
-			return response, err
-		}
-		response.Annotations = annotations
-	}
-
-	return response, nil
-}
-
-func (plugin *NvidiaDevicePlugin) getCDIDeviceAnnotations(id string, devices []string) (map[string]string, error) {
-	annotations, err := cdiapi.UpdateAnnotations(map[string]string{}, "nvidia-device-plugin", id, devices)
-	if err != nil {
-		return nil, fmt.Errorf("failed to add CDI annotations: %v", err)
-	}
-
-	if plugin.cdiAnnotationPrefix == spec.DefaultCDIAnnotationPrefix {
-		return annotations, nil
-	}
-
-	// update annotations if a custom CDI prefix is configured
-	updatedAnnotations := make(map[string]string)
-	for k, v := range annotations {
-		newKey := plugin.cdiAnnotationPrefix + strings.TrimPrefix(k, spec.DefaultCDIAnnotationPrefix)
-		updatedAnnotations[newKey] = v
-	}
-
-	return updatedAnnotations, nil
-}
-
-// PreStartContainer is unimplemented for this plugin
-func (plugin *NvidiaDevicePlugin) PreStartContainer(context.Context, *kubeletdevicepluginv1beta1.PreStartContainerRequest) (*kubeletdevicepluginv1beta1.PreStartContainerResponse, error) {
-	return &kubeletdevicepluginv1beta1.PreStartContainerResponse{}, nil
-}
-
-// dial establishes the gRPC communication with the registered device plugin.
-func (plugin *NvidiaDevicePlugin) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
-	c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
-		grpc.WithTimeout(timeout),
-		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
-			return net.DialTimeout("unix", addr, timeout)
-		}),
-	)
-
-	if err != nil {
-		return nil, err
-	}
-
-	return c, nil
-}
-
-func (plugin *NvidiaDevicePlugin) deviceIDsFromAnnotatedDeviceIDs(ids []string) []string {
-	var deviceIDs []string
-	if *plugin.config.Flags.Plugin.DeviceIDStrategy == spec.DeviceIDStrategyUUID {
-		deviceIDs = rm.AnnotatedIDs(ids).GetIDs()
-	}
-	if *plugin.config.Flags.Plugin.DeviceIDStrategy == spec.DeviceIDStrategyIndex {
-		deviceIDs = plugin.rm.Devices().Subset(ids).GetIndices()
-	}
-	return deviceIDs
-}
-
-func (plugin *NvidiaDevicePlugin) apiDevices() []*kubeletdevicepluginv1beta1.Device {
-	return plugin.rm.Devices().GetPluginDevices(plugin.schedulerConfig.DeviceSplitCount)
-}
-
-func (plugin *NvidiaDevicePlugin) apiEnvs(envvar string, deviceIDs []string) map[string]string {
-	return map[string]string{
-		envvar: strings.Join(deviceIDs, ","),
-	}
-}
-
-func (plugin *NvidiaDevicePlugin) apiDeviceSpecs(driverRoot string, ids []string) []*kubeletdevicepluginv1beta1.DeviceSpec {
-	optional := map[string]bool{
-		"/dev/nvidiactl":        true,
-		"/dev/nvidia-uvm":       true,
-		"/dev/nvidia-uvm-tools": true,
-		"/dev/nvidia-modeset":   true,
-	}
-
-	paths := plugin.rm.GetDevicePaths(ids)
-
-	var specs []*kubeletdevicepluginv1beta1.DeviceSpec
-	for _, p := range paths {
-		if optional[p] {
-			if _, err := os.Stat(p); err != nil {
-				continue
-			}
-		}
-		spec := &kubeletdevicepluginv1beta1.DeviceSpec{
-			ContainerPath: p,
-			HostPath:      filepath.Join(driverRoot, p),
-			Permissions:   "rw",
-		}
-		specs = append(specs, spec)
-	}
-
-	return specs
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go
deleted file mode 100644
index 28a8cdebe..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go
+++ /dev/null
@@ -1,156 +0,0 @@
-/**
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package plugin
-
-import (
-	"testing"
-
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-	"github.com/Project-HAMi/HAMi/pkg/util"
-)
-
-func TestGenerateMigTemplate(t *testing.T) {
-	sconfig := nvidia.NvidiaConfig{
-		MigGeometriesList: []util.AllowedMigGeometries{
-			{
-				Models: []string{"A30"},
-				Geometries: []util.Geometry{
-					{util.MigTemplate{Name: "1g.6gb", Memory: 6144, Count: 4}},
-					{util.MigTemplate{Name: "2g.12gb", Memory: 12288, Count: 2}},
-					{util.MigTemplate{Name: "4g.24gb", Memory: 24576, Count: 1}},
-				},
-			},
-			{
-				Models: []string{"A100-SXM4-40GB", "A100-40GB-PCIe", "A100-PCIE-40GB", "A100-SXM4-40GB"},
-				Geometries: []util.Geometry{
-					{util.MigTemplate{Name: "1g.5gb", Memory: 5120, Count: 7}},
-					{util.MigTemplate{Name: "2g.10gb", Memory: 10240, Count: 3}},
-					{util.MigTemplate{Name: "1g.5gb", Memory: 5120, Count: 1}},
-					{util.MigTemplate{Name: "3g.20gb", Memory: 20480, Count: 2}},
-					{util.MigTemplate{Name: "7g.40gb", Memory: 40960, Count: 1}},
-				},
-			},
-			{
-				Models: []string{"A100-SXM4-80GB", "A100-80GB-PCIe", "A100-PCIE-80GB"},
-				Geometries: []util.Geometry{
-					{util.MigTemplate{Name: "1g.10gb", Memory: 10240, Count: 7}},
-					{util.MigTemplate{Name: "2g.20gb", Memory: 20480, Count: 3}},
-					{util.MigTemplate{Name: "1g.10gb", Memory: 10240, Count: 1}},
-					{util.MigTemplate{Name: "3g.40gb", Memory: 40960, Count: 2}},
-					{util.MigTemplate{Name: "7g.80gb", Memory: 81920, Count: 1}},
-				},
-			},
-		},
-	}
-
-	plugin := NvidiaDevicePlugin{
-		operatingMode:   "mig",
-		schedulerConfig: sconfig,
-	}
-	plugin.migCurrent = nvidia.MigPartedSpec{
-		Version:    "v1",
-		MigConfigs: make(map[string]nvidia.MigConfigSpecSlice),
-	}
-	plugin.migCurrent.MigConfigs["current"] = nvidia.MigConfigSpecSlice{
-		nvidia.MigConfigSpec{
-			Devices:    []int32{0, 1},
-			MigEnabled: true,
-			MigDevices: make(map[string]int32), // Ensure this map is initialized
-		},
-	}
-
-	testCases := []struct {
-		name          string
-		model         string
-		deviceIdx     int
-		containerDev  util.ContainerDevice
-		expectedPos   int
-		expectedReset bool
-		expectedMig   map[string]int32
-	}{
-		{
-			name:      "2g.10gb template",
-			model:     "A100-SXM4-40GB",
-			deviceIdx: 0,
-			containerDev: util.ContainerDevice{
-				Idx:     0,
-				UUID:    "aaaaabbbb[1-1]",
-				Usedmem: 3000,
-			},
-			expectedPos:   1,
-			expectedReset: true,
-			expectedMig: map[string]int32{
-				"2g.10gb": 3,
-			},
-		},
-		{
-			name:      "1g.5gb template",
-			model:     "A100-SXM4-40GB",
-			deviceIdx: 0,
-			containerDev: util.ContainerDevice{
-				Idx:     0,
-				UUID:    "aaaaabbbb[0-1]",
-				Usedmem: 3000,
-			},
-			expectedPos:   1,
-			expectedReset: true,
-			expectedMig: map[string]int32{
-				"1g.5gb": 7,
-			},
-		},
-		{
-			name:      "no reset needed",
-			model:     "A100-SXM4-40GB",
-			deviceIdx: 0,
-			containerDev: util.ContainerDevice{
-				Idx:     0,
-				UUID:    "aaaaabbbb[0-2]",
-				Usedmem: 3000,
-			},
-			expectedPos:   2,
-			expectedReset: false,
-			expectedMig: map[string]int32{
-				"1g.5gb": 8,
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			pos, needsreset := plugin.GenerateMigTemplate(tc.model, tc.deviceIdx, tc.containerDev)
-
-			// Check if the position matches the expected value
-			if pos != tc.expectedPos {
-				t.Errorf("expected position %d, got %d", tc.expectedPos, pos)
-			}
-
-			// Check if the reset flag matches the expected value
-			if needsreset != tc.expectedReset {
-				t.Errorf("expected reset %v, got %v", tc.expectedReset, needsreset)
-			}
-
-			// Check if the mig devices match the expected values
-			migDevices := plugin.migCurrent.MigConfigs["current"][0].MigDevices
-			for k, v := range tc.expectedMig {
-				actual, ok := migDevices[k]
-				if !ok || actual != v {
-					t.Errorf("expected %s count %d, got %d", k, v, actual)
-				}
-			}
-		})
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/allocate.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/allocate.go
deleted file mode 100644
index d83cfb2ee..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/allocate.go
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"fmt"
-	"sort"
-
-	"github.com/NVIDIA/go-gpuallocator/gpuallocator"
-)
-
-var alignedAllocationPolicy = gpuallocator.NewBestEffortPolicy()
-
-// getPreferredAllocation runs an allocation algorithm over the inputs.
-// The algorithm chosen is based both on the incoming set of available devices and various config settings.
-func (r *resourceManager) getPreferredAllocation(available, required []string, size int) ([]string, error) {
-	// If all of the available devices are full GPUs without replicas, then
-	// calculate an aligned allocation across those devices.
-	if r.Devices().AlignedAllocationSupported() && !AnnotatedIDs(available).AnyHasAnnotations() {
-		return r.alignedAlloc(available, required, size)
-	}
-
-	// Otherwise, distribute them evenly across all replicated GPUs
-	return r.distributedAlloc(available, required, size)
-}
-
-// alignedAlloc shells out to the alignedAllocationPolicy that is set in
-// order to calculate the preferred allocation.
-func (r *resourceManager) alignedAlloc(available, required []string, size int) ([]string, error) {
-	var devices []string
-
-	availableDevices, err := gpuallocator.NewDevicesFrom(available)
-	if err != nil {
-		return nil, fmt.Errorf("unable to retrieve list of available devices: %v", err)
-	}
-
-	requiredDevices, err := gpuallocator.NewDevicesFrom(required)
-	if err != nil {
-		return nil, fmt.Errorf("unable to retrieve list of required devices: %v", err)
-	}
-
-	allocatedDevices := alignedAllocationPolicy.Allocate(availableDevices, requiredDevices, size)
-
-	for _, device := range allocatedDevices {
-		devices = append(devices, device.UUID)
-	}
-
-	return devices, nil
-}
-
-// distributedAlloc returns a list of devices such that any replicated
-// devices are distributed across all replicated GPUs equally. It takes into
-// account already allocated replicas to ensure a proper balance across them.
-func (r *resourceManager) distributedAlloc(available, required []string, size int) ([]string, error) {
-	// Get the set of candidate devices as the difference between available and required.
-	candidates := r.devices.Subset(available).Difference(r.devices.Subset(required)).GetIDs()
-	needed := size - len(required)
-
-	if len(candidates) < needed {
-		return nil, fmt.Errorf("not enough available devices to satisfy allocation")
-	}
-
-	// For each candidate device, build a mapping of (stripped) device ID to
-	// total / available replicas for that device.
-	replicas := make(map[string]*struct{ total, available int })
-	for _, c := range candidates {
-		id := AnnotatedID(c).GetID()
-		if _, exists := replicas[id]; !exists {
-			replicas[id] = &struct{ total, available int }{}
-		}
-		replicas[id].available++
-	}
-	for d := range r.devices {
-		id := AnnotatedID(d).GetID()
-		if _, exists := replicas[id]; !exists {
-			continue
-		}
-		replicas[id].total++
-	}
-
-	// Grab the set of 'needed' devices one-by-one from the candidates list.
-	// Before selecting each candidate, first sort the candidate list using the
-	// replicas map above. After sorting, the first element in the list will
-	// contain the device with the least difference between total and available
-	// replications (based on what's already been allocated). Add this device
-	// to the list of devices to allocate, remove it from the candidate list,
-	// down its available count in the replicas map, and repeat.
-	var devices []string
-	for i := 0; i < needed; i++ {
-		sort.Slice(candidates, func(i, j int) bool {
-			iid := AnnotatedID(candidates[i]).GetID()
-			jid := AnnotatedID(candidates[j]).GetID()
-			idiff := replicas[iid].total - replicas[iid].available
-			jdiff := replicas[jid].total - replicas[jid].available
-			return idiff < jdiff
-		})
-		id := AnnotatedID(candidates[0]).GetID()
-		replicas[id].available--
-		devices = append(devices, candidates[0])
-		candidates = candidates[1:]
-	}
-
-	// Add the set of required devices to this list and return it.
-	devices = append(required, devices...)
-
-	return devices, nil
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map_test.go
deleted file mode 100644
index c65147443..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map_test.go
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
-	"github.com/stretchr/testify/require"
-	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
-)
-
-func TestDeviceMapInsert(t *testing.T) {
-	device0 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}}
-	device0withIndex := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}, Index: "index"}
-	device1 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1"}}
-
-	testCases := []struct {
-		description       string
-		deviceMap         DeviceMap
-		key               string
-		value             *Device
-		expectedDeviceMap DeviceMap
-	}{
-		{
-			description: "insert into empty map",
-			deviceMap:   make(DeviceMap),
-			key:         "resource",
-			value:       &device0,
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-		},
-		{
-			description: "add to existing resource",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			key:   "resource",
-			value: &device1,
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-					"1": &device1,
-				},
-			},
-		},
-		{
-			description: "add new resource",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			key:   "resource1",
-			value: &device0,
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-				"resource1": Devices{
-					"0": &device0,
-				},
-			},
-		},
-		{
-			description: "overwrite existing device",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			key:   "resource",
-			value: &device0withIndex,
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0withIndex,
-				},
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			tc.deviceMap.insert(spec.ResourceName(tc.key), tc.value)
-
-			require.EqualValues(t, tc.expectedDeviceMap, tc.deviceMap)
-		})
-	}
-}
-
-func TestUpdateDeviceMapWithReplicas(t *testing.T) {
-	device0 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}, Index: "0"}
-	device1 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1"}}
-	device2 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "2"}}
-	device3 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "3"}}
-
-	testCases := []struct {
-		description       string
-		config            *nvidia.DeviceConfig
-		devices           DeviceMap
-		expectedDeviceMap DeviceMap
-	}{
-		{
-			description: "Update device map with replicas",
-			config: &nvidia.DeviceConfig{
-				Config: &spec.Config{
-					Sharing: spec.Sharing{
-						TimeSlicing: spec.ReplicatedResources{
-							Resources: []spec.ReplicatedResource{
-								{
-									Name:     "resource1",
-									Replicas: 2,
-									Rename:   "replicated-resource1",
-									Devices: spec.ReplicatedDevices{
-										All: true,
-									},
-								},
-								{
-									Name:     "resource2",
-									Replicas: 1,
-									Devices: spec.ReplicatedDevices{
-										All: true,
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-			devices: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-					"1": &device1,
-				},
-				"resource2": Devices{
-					"2": &device2,
-				},
-				"resource3": Devices{
-					"3": &device3,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"replicated-resource1": Devices{
-					"0::0": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0::0"}, Index: "0"},
-					"0::1": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0::1"}, Index: "0"},
-					"1::0": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1::0"}},
-					"1::1": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1::1"}},
-				},
-				"resource2": Devices{
-					"2::0": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "2::0"}},
-				},
-				"resource3": Devices{
-					"3": &device3,
-				},
-			},
-		},
-		{
-			description: "Some devices are not replicated",
-			config: &nvidia.DeviceConfig{
-				Config: &spec.Config{
-					Sharing: spec.Sharing{
-						TimeSlicing: spec.ReplicatedResources{
-							Resources: []spec.ReplicatedResource{
-								{
-									Name:     "resource1",
-									Replicas: 2,
-									Rename:   "replicated-resource1",
-									Devices: spec.ReplicatedDevices{
-										List: []spec.ReplicatedDeviceRef{"0"}, // only replicate index 0
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-			devices: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-					"1": &device1,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"replicated-resource1": Devices{
-					"0::0": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0::0"}, Index: "0"},
-					"0::1": &Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0::1"}, Index: "0"},
-				},
-				"resource1": Devices{
-					"1": &device1,
-				},
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			devices, _ := updateDeviceMapWithReplicas(tc.config, tc.devices)
-			require.EqualValues(t, tc.expectedDeviceMap, devices)
-		})
-	}
-}
-
-func TestDeviceMapMerge(t *testing.T) {
-	device0 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}}
-	device1 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1"}}
-	device2 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "2"}}
-	device0Updated := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}, Index: "updated"}
-
-	testCases := []struct {
-		description       string
-		deviceMap         DeviceMap
-		otherDeviceMap    DeviceMap
-		expectedDeviceMap DeviceMap
-	}{
-		{
-			description:    "merge into empty map",
-			deviceMap:      make(DeviceMap),
-			otherDeviceMap: DeviceMap{"resource": Devices{"0": &device0}},
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-		},
-		{
-			description: "merge from empty map",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			otherDeviceMap: make(DeviceMap),
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-		},
-		{
-			description: "merge with overlapping keys",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			otherDeviceMap: DeviceMap{
-				"resource": Devices{
-					"1": &device1,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-					"1": &device1,
-				},
-			},
-		},
-		{
-			description: "merge with device ID conflict (overwrite existing device)",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			otherDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0Updated,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0Updated,
-				},
-			},
-		},
-		{
-			description: "merge with new resource",
-			deviceMap: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-				},
-			},
-			otherDeviceMap: DeviceMap{
-				"resource2": Devices{
-					"1": &device1,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-				},
-				"resource2": Devices{
-					"1": &device1,
-				},
-			},
-		},
-		{
-			description: "merge with multiple devices and resources",
-			deviceMap: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-				},
-			},
-			otherDeviceMap: DeviceMap{
-				"resource1": Devices{
-					"1": &device1,
-				},
-				"resource2": Devices{
-					"2": &device2,
-				},
-			},
-			expectedDeviceMap: DeviceMap{
-				"resource1": Devices{
-					"0": &device0,
-					"1": &device1,
-				},
-				"resource2": Devices{
-					"2": &device2,
-				},
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			tc.deviceMap.merge(tc.otherDeviceMap)
-
-			require.EqualValues(t, tc.expectedDeviceMap, tc.deviceMap)
-		})
-	}
-}
-
-func TestDeviceMapIsEmpty(t *testing.T) {
-	device0 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}}
-
-	testCases := []struct {
-		description string
-		deviceMap   DeviceMap
-		expected    bool
-	}{
-		{
-			description: "empty map",
-			deviceMap:   make(DeviceMap),
-			expected:    true,
-		},
-		{
-			description: "map with empty resource",
-			deviceMap: DeviceMap{
-				"resource": Devices{},
-			},
-			expected: true,
-		},
-		{
-			description: "map with non-empty resource",
-			deviceMap: DeviceMap{
-				"resource": Devices{
-					"0": &device0,
-				},
-			},
-			expected: false,
-		},
-		{
-			description: "map with multiple empty resources",
-			deviceMap: DeviceMap{
-				"resource1": Devices{},
-				"resource2": Devices{},
-			},
-			expected: true,
-		},
-		{
-			description: "map with multiple resources, one non-empty",
-			deviceMap: DeviceMap{
-				"resource1": Devices{},
-				"resource2": Devices{
-					"0": &device0,
-				},
-			},
-			expected: false,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			actual := tc.deviceMap.isEmpty()
-
-			require.Equal(t, tc.expected, actual)
-		})
-	}
-}
-
-func TestDeviceMapGetIDsOfDevicesToReplicate(t *testing.T) {
-	device0 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "0"}, Index: "0"}
-	device1 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "1"}, Index: "1"}
-	device2 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "2"}, Index: "2"}
-	device3 := Device{Device: kubeletdevicepluginv1beta1.Device{ID: "3"}, Index: "3"}
-
-	deviceMap := DeviceMap{
-		"resource1": Devices{
-			"0": &device0,
-			"1": &device1,
-			"2": &device2,
-			"GPU-8dcd427f-483b-b48f-d7e5-75fb19a52b76": &device3,
-		},
-	}
-
-	testCases := []struct {
-		description string
-		deviceMap   DeviceMap
-		resource    *spec.ReplicatedResource
-		expectedIDs []string
-		expectedErr error
-	}{
-		{
-			description: "resource does not exist",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name:    "nonexistent_resource",
-				Devices: spec.ReplicatedDevices{},
-			},
-			expectedIDs: nil,
-			expectedErr: nil,
-		},
-		{
-			description: "replicate all devices",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					All: true,
-				},
-			},
-			expectedIDs: []string{"0", "1", "2", "3"},
-			expectedErr: nil,
-		},
-		{
-			description: "replicate specific count of devices (count exceeds available)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					Count: 5,
-				},
-			},
-			expectedIDs: nil,
-			expectedErr: fmt.Errorf("requested 5 devices to be replicated, but only 4 devices available"),
-		},
-		{
-			description: "replicate specific devices by ID (valid)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					List: []spec.ReplicatedDeviceRef{
-						spec.ReplicatedDeviceRef("GPU-8dcd427f-483b-b48f-d7e5-75fb19a52b76"), // ref UUID
-					},
-				},
-			},
-			expectedIDs: []string{"3"},
-			expectedErr: nil,
-		},
-		{
-			description: "replicate specific devices by ID (invalid ID)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					List: []spec.ReplicatedDeviceRef{
-						spec.ReplicatedDeviceRef("GPU-8dcd427f-483b-b48f-d7e5-75fb19a52b77"), // Nonexistent device
-					},
-				},
-			},
-			expectedIDs: nil,
-			expectedErr: fmt.Errorf("no matching device with UUID: GPU-8dcd427f-483b-b48f-d7e5-75fb19a52b77"),
-		},
-		{
-			description: "replicate specific devices by GPU index (valid)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					List: []spec.ReplicatedDeviceRef{
-						spec.ReplicatedDeviceRef("0"), // Index: "0"
-						spec.ReplicatedDeviceRef("1"), // Index: "1"
-					},
-				},
-			},
-			expectedIDs: []string{"0", "1"},
-			expectedErr: nil,
-		},
-		{
-			description: "replicate specific devices by GPU index (invalid)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					List: []spec.ReplicatedDeviceRef{
-						spec.ReplicatedDeviceRef("0"), // Index: "0"
-						spec.ReplicatedDeviceRef("4"), // Nonexistent Index
-					},
-				},
-			},
-			expectedIDs: nil,
-			expectedErr: fmt.Errorf("no matching device at index: 4"),
-		},
-		{
-			description: "invalid replicated devices",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name: "resource1",
-				Devices: spec.ReplicatedDevices{
-					List: []spec.ReplicatedDeviceRef{
-						spec.ReplicatedDeviceRef("invalid_index"), // Invalid gpu
-					},
-				},
-			},
-			expectedIDs: nil,
-			expectedErr: nil,
-		},
-		{
-			description: "unexpected error (no replication criteria provided)",
-			deviceMap:   deviceMap,
-			resource: &spec.ReplicatedResource{
-				Name:    "resource1",
-				Devices: spec.ReplicatedDevices{},
-			},
-			expectedIDs: nil,
-			expectedErr: fmt.Errorf("unexpected error"),
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			ids, err := tc.deviceMap.getIDsOfDevicesToReplicate(tc.resource)
-
-			if tc.expectedErr != nil {
-				require.Error(t, err)
-				require.EqualError(t, err, tc.expectedErr.Error())
-			} else {
-				require.NoError(t, err)
-			}
-
-			require.ElementsMatch(t, tc.expectedIDs, ids)
-		})
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/health_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/health_test.go
deleted file mode 100644
index 5818839f9..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/health_test.go
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-)
-
-func TestGetAdditionalXids(t *testing.T) {
-	testCases := []struct {
-		description string
-		input       string
-		expected    []uint64
-	}{
-		{
-			description: "Empty input",
-		},
-		{
-			description: "Only comma",
-			input:       ",",
-		},
-		{
-			description: "Non-integer input",
-			input:       "not-an-int",
-		},
-		{
-			description: "Single integer",
-			input:       "68",
-			expected:    []uint64{68},
-		},
-		{
-			description: "Negative integer",
-			input:       "-68",
-		},
-		{
-			description: "Single integer with trailing spaces",
-			input:       "68  ",
-			expected:    []uint64{68},
-		},
-		{
-			description: "Single integer followed by comma without trailing number",
-			input:       "68,",
-			expected:    []uint64{68},
-		},
-		{
-			description: "Comma without preceding number followed by single integer",
-			input:       ",68",
-			expected:    []uint64{68},
-		},
-		{
-			description: "Two comma-separated integers",
-			input:       "68,67",
-			expected:    []uint64{68, 67},
-		},
-		{
-			description: "Two integers separated by non-integer",
-			input:       "68,not-an-int,67",
-			expected:    []uint64{68, 67},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			xids := getAdditionalXids(tc.input)
-			require.EqualValues(t, tc.expected, xids)
-		})
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/helper.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/helper.go
deleted file mode 100644
index 4c3ff3c65..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/helper.go
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-// int8Slice wraps an []int8 with more functions.
-type int8Slice []int8
-
-// String turns a nil terminated int8Slice into a string
-func (s int8Slice) String() string {
-	var b []byte
-	for _, c := range s {
-		if c == 0 {
-			break
-		}
-		b = append(b, byte(c))
-	}
-	return string(b)
-}
-
-// uintPtr returns a *uint from a uint32
-func uintPtr(c uint32) *uint {
-	i := uint(c)
-	return &i
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices_test.go
deleted file mode 100644
index 921af2f6d..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices_test.go
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	//"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/rm"
-	"github.com/stretchr/testify/require"
-)
-
-// Test GetUUID for nvmlDevice
-func TestNvmlDevice_GetUUID(t *testing.T) {
-	testCases := []struct {
-		description   string
-		nvmlDevice    nvml.Device
-		expectedUUID  string
-		expectedError error
-	}{
-		{
-			description: "Successful UUID retrieval",
-			nvmlDevice: &nvml.DeviceMock{
-				GetUUIDFunc: func() (string, nvml.Return) {
-					return "GPU-12345", nvml.SUCCESS
-				},
-			},
-			expectedUUID:  "GPU-12345",
-			expectedError: nil,
-		},
-		{
-			description: "Error retrieving UUID",
-			nvmlDevice: &nvml.DeviceMock{
-				GetUUIDFunc: func() (string, nvml.Return) {
-					return "GPU-12345", nvml.ERROR_UNKNOWN
-				},
-			},
-			expectedUUID:  "",
-			expectedError: nvml.ERROR_UNKNOWN,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			device := nvmlDevice{Device: tc.nvmlDevice}
-			uuid, err := device.GetUUID()
-
-			if tc.expectedError == nil {
-				require.NoError(t, err)
-			} else {
-				require.EqualError(t, err, tc.expectedError.Error())
-			}
-			require.Equal(t, tc.expectedUUID, uuid)
-		})
-	}
-}
-
-func TestNvmlDevice_GetPaths(t *testing.T) {
-	testCases := []struct {
-		description   string
-		nvmlDevice    nvml.Device
-		expectedPaths []string
-		expectedError error
-	}{
-		{
-			description: "Successful path retrieval",
-			nvmlDevice: &nvml.DeviceMock{
-				GetMinorNumberFunc: func() (int, nvml.Return) {
-					return 0, nvml.SUCCESS
-				},
-			},
-			expectedPaths: []string{"/dev/nvidia0"},
-			expectedError: nil,
-		},
-		{
-			description: "Error retrieving UUID",
-			nvmlDevice: &nvml.DeviceMock{
-				GetMinorNumberFunc: func() (int, nvml.Return) {
-					return 0, nvml.ERROR_UNKNOWN
-				},
-			},
-			expectedPaths: nil,
-			expectedError: fmt.Errorf("error getting GPU device minor number: %v", nvml.ERROR_UNKNOWN),
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			device := nvmlDevice{Device: tc.nvmlDevice}
-			paths, err := device.GetPaths()
-
-			if tc.expectedError == nil {
-				require.NoError(t, err)
-			} else {
-				require.Contains(t, err.Error(), nvml.ERROR_UNKNOWN.Error())
-			}
-			require.Equal(t, tc.expectedPaths, paths)
-		})
-	}
-}
-
-func TestNvmlDevice_GetNumaNode(t *testing.T) {
-	testCases := []struct {
-		description     string
-		nvmlDevice      nvml.Device
-		expectedHasNode bool
-		expectedNode    int
-		expectedError   error
-	}{
-		{
-			description: "No NUMA node",
-			nvmlDevice: &nvml.DeviceMock{
-				GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
-					return nvml.PciInfo{BusId: [32]int8{'0', '0', '0', '0', ':', '0', '2', ':', '0', '0', '.', '0', 0, 0, 0, 0}}, nvml.SUCCESS
-				},
-			},
-			expectedHasNode: false,
-			expectedNode:    0,
-			expectedError:   nil,
-		},
-		{
-			description: "Error getting PCI info",
-			nvmlDevice: &nvml.DeviceMock{
-				GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
-					return nvml.PciInfo{}, nvml.ERROR_UNKNOWN
-				},
-			},
-			expectedHasNode: false,
-			expectedNode:    0,
-			expectedError:   nvml.ERROR_UNKNOWN,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			device := nvmlDevice{Device: tc.nvmlDevice}
-			hasNode, node, err := device.GetNumaNode()
-
-			if tc.expectedError == nil {
-				require.NoError(t, err)
-			} else {
-				require.Contains(t, err.Error(), tc.expectedError.Error())
-			}
-			require.Equal(t, tc.expectedHasNode, hasNode)
-			require.Equal(t, tc.expectedNode, node)
-		})
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go
deleted file mode 100644
index ee5048348..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"fmt"
-
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	"k8s.io/klog/v2"
-)
-
-type nvmlResourceManager struct {
-	resourceManager
-	nvml nvml.Interface
-}
-
-var _ ResourceManager = (*nvmlResourceManager)(nil)
-
-// NewNVMLResourceManagers returns a set of ResourceManagers, one for each NVML resource in 'config'.
-func NewNVMLResourceManagers(nvmllib nvml.Interface, config *nvidia.DeviceConfig) ([]ResourceManager, error) {
-	ret := nvmllib.Init()
-	if ret != nvml.SUCCESS {
-		return nil, fmt.Errorf("failed to initialize NVML: %v", ret)
-	}
-	defer func() {
-		ret := nvmllib.Shutdown()
-		if ret != nvml.SUCCESS {
-			klog.Infof("Error shutting down NVML: %v", ret)
-		}
-	}()
-
-	deviceMap, err := NewDeviceMap(nvmllib, config)
-	if err != nil {
-		return nil, fmt.Errorf("error building device map: %v", err)
-	}
-
-	var rms []ResourceManager
-	for resourceName, devices := range deviceMap {
-		if len(devices) == 0 {
-			continue
-		}
-		for key, value := range devices {
-			if nvidia.FilterDeviceToRegister(value.ID, value.Index) {
-				klog.V(5).InfoS("Filtering device", "device", value.ID)
-				delete(devices, key)
-				continue
-			}
-		}
-		r := &nvmlResourceManager{
-			resourceManager: resourceManager{
-				config:   config,
-				resource: resourceName,
-				devices:  devices,
-			},
-			nvml: nvmllib,
-		}
-		rms = append(rms, r)
-	}
-
-	return rms, nil
-}
-
-// GetPreferredAllocation runs an allocation algorithm over the inputs.
-// The algorithm chosen is based both on the incoming set of available devices and various config settings.
-func (r *nvmlResourceManager) GetPreferredAllocation(available, required []string, size int) ([]string, error) {
-	return r.getPreferredAllocation(available, required, size)
-}
-
-// GetDevicePaths returns the required and optional device nodes for the requested resources
-func (r *nvmlResourceManager) GetDevicePaths(ids []string) []string {
-	paths := []string{
-		"/dev/nvidiactl",
-		"/dev/nvidia-uvm",
-		"/dev/nvidia-uvm-tools",
-		"/dev/nvidia-modeset",
-	}
-
-	for _, p := range r.Devices().Subset(ids).GetPaths() {
-		paths = append(paths, p)
-	}
-
-	return paths
-}
-
-// CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
-func (r *nvmlResourceManager) CheckHealth(stop <-chan interface{}, unhealthy chan<- *Device) error {
-	return r.checkHealth(stop, r.devices, unhealthy)
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/rm.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/rm.go
deleted file mode 100644
index 0c30bbf54..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/rm.go
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-import (
-	"fmt"
-	"strings"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-	"k8s.io/klog/v2"
-)
-
-// resourceManager forms the base type for specific resource manager implementations
-type resourceManager struct {
-	config   *nvidia.DeviceConfig
-	resource spec.ResourceName
-	devices  Devices
-}
-
-// ResourceManager provides an interface for listing a set of Devices and checking health on them
-type ResourceManager interface {
-	Resource() spec.ResourceName
-	Devices() Devices
-	GetDevicePaths([]string) []string
-	GetPreferredAllocation(available, required []string, size int) ([]string, error)
-	CheckHealth(stop <-chan interface{}, unhealthy chan<- *Device) error
-}
-
-// NewResourceManagers returns a []ResourceManager, one for each resource in 'config'.
-func NewResourceManagers(nvmllib nvml.Interface, config *nvidia.DeviceConfig) ([]ResourceManager, error) {
-	// logWithReason logs the output of the has* / is* checks from the info.Interface
-	logWithReason := func(f func() (bool, string), tag string) bool {
-		is, reason := f()
-		if !is {
-			tag = "non-" + tag
-		}
-		klog.Infof("Detected %v platform: %v", tag, reason)
-		return is
-	}
-
-	infolib := info.New()
-
-	hasNVML := logWithReason(infolib.HasNvml, "NVML")
-	isTegra := logWithReason(infolib.IsTegraSystem, "Tegra")
-
-	if !hasNVML && !isTegra {
-		klog.Error("Incompatible platform detected")
-		klog.Error("If this is a GPU node, did you configure the NVIDIA Container Toolkit?")
-		klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
-		klog.Error("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
-		klog.Error("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
-		if *config.Flags.FailOnInitError {
-			return nil, fmt.Errorf("platform detection failed")
-		}
-		return nil, nil
-	}
-
-	// The NVIDIA container stack does not yet support the use of integrated AND discrete GPUs on the same node.
-	if hasNVML && isTegra {
-		klog.Warning("Disabling Tegra-based resources on NVML system")
-		isTegra = false
-	}
-
-	var resourceManagers []ResourceManager
-
-	if hasNVML {
-		nvmlManagers, err := NewNVMLResourceManagers(nvmllib, config)
-		if err != nil {
-			return nil, fmt.Errorf("failed to construct NVML resource managers: %v", err)
-		}
-		resourceManagers = append(resourceManagers, nvmlManagers...)
-	}
-
-	if isTegra {
-		tegraManagers, err := NewTegraResourceManagers(config)
-		if err != nil {
-			return nil, fmt.Errorf("failed to construct Tegra resource managers: %v", err)
-		}
-		resourceManagers = append(resourceManagers, tegraManagers...)
-	}
-
-	return resourceManagers, nil
-}
-
-// Resource gets the resource name associated with the ResourceManager
-func (r *resourceManager) Resource() spec.ResourceName {
-	return r.resource
-}
-
-// Resource gets the devices managed by the ResourceManager
-func (r *resourceManager) Devices() Devices {
-	return r.devices
-}
-
-// AddDefaultResourcesToConfig adds default resource matching rules to config.Resources
-func AddDefaultResourcesToConfig(config *nvidia.DeviceConfig) error {
-	//config.Resources.AddGPUResource("*", "gpu")
-	config.Resources.GPUs = append(config.Resources.GPUs, spec.Resource{
-		Pattern: "*",
-		Name:    spec.ResourceName(*config.ResourceName),
-	})
-	fmt.Println("config=", config.Resources.GPUs)
-	switch *config.Flags.MigStrategy {
-	case spec.MigStrategySingle:
-		return config.Resources.AddMIGResource("*", "gpu")
-	case spec.MigStrategyMixed:
-		hasNVML, reason := info.New().HasNvml()
-		if !hasNVML {
-			klog.Warningf("mig-strategy=%q is only supported with NVML", spec.MigStrategyMixed)
-			klog.Warningf("NVML not detected: %v", reason)
-			return nil
-		}
-
-		nvmllib := nvml.New()
-		ret := nvmllib.Init()
-		if ret != nvml.SUCCESS {
-			if *config.Flags.FailOnInitError {
-				return fmt.Errorf("failed to initialize NVML: %v", ret)
-			}
-			return nil
-		}
-		defer func() {
-			ret := nvmllib.Shutdown()
-			if ret != nvml.SUCCESS {
-				klog.Errorf("Error shutting down NVML: %v", ret)
-			}
-		}()
-
-		devicelib := device.New(
-			device.WithNvml(nvmllib),
-		)
-		return devicelib.VisitMigProfiles(func(p device.MigProfile) error {
-			profileInfo := p.GetInfo()
-			if profileInfo.C != profileInfo.G {
-				return nil
-			}
-			resourceName := strings.ReplaceAll("mig-"+p.String(), "+", ".")
-			return config.Resources.AddMIGResource(p.String(), resourceName)
-		})
-	}
-	return nil
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/wsl_devices.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/wsl_devices.go
deleted file mode 100644
index 7696a3038..000000000
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/wsl_devices.go
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
-
-package rm
-
-type wslDevice nvmlDevice
-
-var _ deviceInfo = (*wslDevice)(nil)
-
-// GetUUID returns the UUID of the device
-func (d wslDevice) GetUUID() (string, error) {
-	return nvmlDevice(d).GetUUID()
-}
-
-// GetPaths returns the paths for a tegra device.
-func (d wslDevice) GetPaths() ([]string, error) {
-	return []string{"/dev/dxg"}, nil
-}
-
-// GetNumaNode returns the NUMA node associated with the GPU device
-func (d wslDevice) GetNumaNode() (bool, int, error) {
-	return nvmlDevice(d).GetNumaNode()
-}
diff --git a/pkg/device/nvidia/device.go b/pkg/device/nvidia/device.go
index 2d962e9b3..664399689 100644
--- a/pkg/device/nvidia/device.go
+++ b/pkg/device/nvidia/device.go
@@ -23,13 +23,13 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/Project-HAMi/HAMi/pkg/util"
-	"github.com/Project-HAMi/HAMi/pkg/util/nodelock"
-
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/klog/v2"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/util"
+	"github.com/Project-HAMi/HAMi/pkg/util/nodelock"
 )
 
 const (
diff --git a/pkg/nvidia-plugin/api/config/v1/config.go b/pkg/nvidia-plugin/api/config/v1/config.go
new file mode 100644
index 000000000..5672340bc
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/config.go
@@ -0,0 +1,160 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"fmt"
+	"io"
+	"os"
+
+	cli "github.com/urfave/cli/v2"
+
+	"sigs.k8s.io/yaml"
+)
+
+// Version indicates the version of the 'Config' struct used to hold configuration information.
+const Version = "v1"
+
+// Config is a versioned struct used to hold configuration information.
+type Config struct {
+	Version   string    `json:"version"             yaml:"version"`
+	Flags     Flags     `json:"flags,omitempty"     yaml:"flags,omitempty"`
+	Resources Resources `json:"resources,omitempty" yaml:"resources,omitempty"`
+	Sharing   Sharing   `json:"sharing,omitempty"   yaml:"sharing,omitempty"`
+	Imex      Imex      `json:"imex,omitempty"      yaml:"imex,omitempty"`
+}
+
+// NewConfig builds out a Config struct from a config file (or command line flags).
+// The data stored in the config will be populated in order of precedence from
+// (1) command line, (2) environment variable, (3) config file.
+func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
+	config := &Config{Version: Version}
+
+	if configFile := c.String("config-file"); configFile != "" {
+		var err error
+		config, err = parseConfig(configFile)
+		if err != nil {
+			return nil, fmt.Errorf("unable to parse config file: %v", err)
+		}
+	}
+
+	config.Flags.UpdateFromCLIFlags(c, flags)
+	// TODO: This is currently not at the flags level?
+	// Does this mean that we should move UpdateFromCLIFlags to function off Config?
+	if c.IsSet("imex-channel-ids") {
+		config.Imex.ChannelIDs = c.IntSlice("imex-channel-ids")
+	}
+	if c.IsSet("imex-required") {
+		config.Imex.Required = c.Bool("imex-required")
+	}
+
+	// If nvidiaDevRoot (the path to the device nodes on the host) is not set,
+	// we default to using the driver root on the host.
+	if config.Flags.NvidiaDevRoot == nil || *config.Flags.NvidiaDevRoot == "" {
+		config.Flags.NvidiaDevRoot = config.Flags.NvidiaDriverRoot
+	}
+
+	// We explicitly set sharing.mps.failRequestsGreaterThanOne = true
+	// This can be relaxed in certain cases -- such as a single GPU -- but
+	// requires additional logic around when it's OK to combine requests and
+	// makes the semantics of a request unclear.
+	if config.Sharing.MPS != nil {
+		config.Sharing.MPS.FailRequestsGreaterThanOne = true
+	}
+
+	return config, nil
+}
+
+// logger is used to issue warning in API functions without requiring an explicit implementation.
+type logger interface {
+	Warning(...interface{})
+	Warningf(string, ...interface{})
+}
+
+// DisableResourceNamingInConfig temporarily disable the resource renaming feature of the plugin.
+// This may be reenabled in a future release.
+func DisableResourceNamingInConfig(logger logger, config *Config) {
+	// Disable resource renaming through config.Resource
+	if len(config.Resources.GPUs) > 0 || len(config.Resources.MIGs) > 0 {
+		logger.Warning("Customizing the 'resources' field is not yet supported in the config. Ignoring...")
+	}
+	config.Resources.GPUs = nil
+	config.Resources.MIGs = nil
+
+	// Disable renaming / device selection in Sharing.TimeSlicing.Resources
+	config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing")
+	// Disable renaming / device selection in Sharing.MPS.Resources
+	config.Sharing.MPS.disableResoureRenaming(logger, "mps")
+}
+
+// parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct.
+func parseConfig(configFile string) (*Config, error) {
+	reader, err := os.Open(configFile)
+	if err != nil {
+		return nil, fmt.Errorf("error opening config file: %v", err)
+	}
+	defer reader.Close()
+
+	config, err := parseConfigFrom(reader)
+	if err != nil {
+		return nil, fmt.Errorf("error parsing config file: %v", err)
+	}
+
+	return config, nil
+}
+
+func parseConfigFrom(reader io.Reader) (*Config, error) {
+	var err error
+	var configYaml []byte
+
+	configYaml, err = io.ReadAll(reader)
+	if err != nil {
+		return nil, fmt.Errorf("read error: %v", err)
+	}
+
+	var config Config
+	err = yaml.Unmarshal(configYaml, &config)
+	if err != nil {
+		return nil, fmt.Errorf("unmarshal error: %v", err)
+	}
+
+	if config.Version == "" {
+		config.Version = Version
+	}
+
+	if config.Version != Version {
+		return nil, fmt.Errorf("unknown version: %v", config.Version)
+	}
+
+	return &config, nil
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/consts.go b/pkg/nvidia-plugin/api/config/v1/consts.go
new file mode 100644
index 000000000..eaafe7258
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/consts.go
@@ -0,0 +1,72 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
+)
+
+// Constants related to resource names
+const (
+	ResourceNamePrefix              = "nvidia.com"
+	DefaultSharedResourceNameSuffix = ".shared"
+	MaxResourceNameLength           = 63
+)
+
+// Constants representing the various MIG strategies
+const (
+	MigStrategyNone   = "none"
+	MigStrategySingle = "single"
+	MigStrategyMixed  = "mixed"
+)
+
+// Constants to represent the various device list strategies
+const (
+	DeviceListStrategyEnvVar         = "envvar"
+	DeviceListStrategyVolumeMounts   = "volume-mounts"
+	DeviceListStrategyCDIAnnotations = "cdi-annotations"
+	DeviceListStrategyCDICRI         = "cdi-cri"
+)
+
+// Constants to represent the various device id strategies
+const (
+	DeviceIDStrategyUUID  = "uuid"
+	DeviceIDStrategyIndex = "index"
+)
+
+// Constants related to generating CDI specifications
+const (
+	DefaultCDIAnnotationPrefix = cdiapi.AnnotationPrefix
+	DefaultNvidiaCTKPath       = "/usr/bin/nvidia-ctk"
+	DefaultContainerDriverRoot = "/driver-root"
+)
diff --git a/pkg/nvidia-plugin/api/config/v1/duration.go b/pkg/nvidia-plugin/api/config/v1/duration.go
new file mode 100644
index 000000000..a3bd7f118
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/duration.go
@@ -0,0 +1,69 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// Duration wraps a time.Duration function with custom JSON marshaling/unmarshaling
+type Duration time.Duration
+
+// MarshalJSON marshals 'Duration' to its raw bytes representation
+func (d Duration) MarshalJSON() ([]byte, error) {
+	return json.Marshal(time.Duration(d).String())
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'Duration' type.
+func (d *Duration) UnmarshalJSON(b []byte) error {
+	var v interface{}
+	if err := json.Unmarshal(b, &v); err != nil {
+		return err
+	}
+	switch value := v.(type) {
+	case float64:
+		*d = Duration(time.Duration(value))
+		return nil
+	case string:
+		tmp, err := time.ParseDuration(value)
+		if err != nil {
+			return err
+		}
+		*d = Duration(tmp)
+		return nil
+	default:
+		return fmt.Errorf("invalid duration")
+	}
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/flags.go b/pkg/nvidia-plugin/api/config/v1/flags.go
new file mode 100644
index 000000000..d26a96b78
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/flags.go
@@ -0,0 +1,190 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"encoding/json"
+	"fmt"
+
+	cli "github.com/urfave/cli/v2"
+)
+
+// prt returns a reference to whatever type is passed into it
+func ptr[T any](x T) *T {
+	return &x
+}
+
+// updateFromCLIFlag conditionally updates the config flag at 'pflag' to the value of the CLI flag with name 'flagName'
+func updateFromCLIFlag[T any](pflag **T, c *cli.Context, flagName string) {
+	if c.IsSet(flagName) || *pflag == (*T)(nil) {
+		switch flag := any(pflag).(type) {
+		case **string:
+			*flag = ptr(c.String(flagName))
+		case **[]string:
+			*flag = ptr(c.StringSlice(flagName))
+		case **bool:
+			*flag = ptr(c.Bool(flagName))
+		case **Duration:
+			*flag = ptr(Duration(c.Duration(flagName)))
+		case **deviceListStrategyFlag:
+			*flag = ptr((deviceListStrategyFlag)(c.StringSlice(flagName)))
+		default:
+			panic(fmt.Errorf("unsupported flag type for %v: %T", flagName, flag))
+		}
+	}
+}
+
+// Flags holds the full list of flags used to configure the device plugin and GFD.
+type Flags struct {
+	CommandLineFlags
+}
+
+// CommandLineFlags holds the list of command line flags used to configure the device plugin and GFD.
+type CommandLineFlags struct {
+	MigStrategy             *string                 `json:"migStrategy"                yaml:"migStrategy"`
+	FailOnInitError         *bool                   `json:"failOnInitError"            yaml:"failOnInitError"`
+	MpsRoot                 *string                 `json:"mpsRoot,omitempty"          yaml:"mpsRoot,omitempty"`
+	NvidiaDriverRoot        *string                 `json:"nvidiaDriverRoot,omitempty" yaml:"nvidiaDriverRoot,omitempty"`
+	NvidiaDevRoot           *string                 `json:"nvidiaDevRoot,omitempty"    yaml:"nvidiaDevRoot,omitempty"`
+	GDSEnabled              *bool                   `json:"gdsEnabled"                 yaml:"gdsEnabled"`
+	MOFEDEnabled            *bool                   `json:"mofedEnabled"               yaml:"mofedEnabled"`
+	UseNodeFeatureAPI       *bool                   `json:"useNodeFeatureAPI"          yaml:"useNodeFeatureAPI"`
+	DeviceDiscoveryStrategy *string                 `json:"deviceDiscoveryStrategy"    yaml:"deviceDiscoveryStrategy"`
+	Plugin                  *PluginCommandLineFlags `json:"plugin,omitempty"           yaml:"plugin,omitempty"`
+	GFD                     *GFDCommandLineFlags    `json:"gfd,omitempty"              yaml:"gfd,omitempty"`
+}
+
+// PluginCommandLineFlags holds the list of command line flags specific to the device plugin.
+type PluginCommandLineFlags struct {
+	PassDeviceSpecs     *bool                   `json:"passDeviceSpecs"     yaml:"passDeviceSpecs"`
+	DeviceListStrategy  *deviceListStrategyFlag `json:"deviceListStrategy"  yaml:"deviceListStrategy"`
+	DeviceIDStrategy    *string                 `json:"deviceIDStrategy"    yaml:"deviceIDStrategy"`
+	CDIAnnotationPrefix *string                 `json:"cdiAnnotationPrefix" yaml:"cdiAnnotationPrefix"`
+	NvidiaCTKPath       *string                 `json:"nvidiaCTKPath"       yaml:"nvidiaCTKPath"`
+	ContainerDriverRoot *string                 `json:"containerDriverRoot" yaml:"containerDriverRoot"`
+}
+
+// deviceListStrategyFlag is a custom type for parsing the deviceListStrategy flag.
+type deviceListStrategyFlag []string
+
+// UnmarshalJSON implements the custom unmarshaler for the deviceListStrategyFlag type.
+// Since this option allows a single string or a list of strings to be specified,
+// we need to handle both cases.
+func (f *deviceListStrategyFlag) UnmarshalJSON(b []byte) error {
+	var single string
+	err := json.Unmarshal(b, &single)
+	if err == nil {
+		*f = []string{single}
+		return nil
+	}
+
+	var multi []string
+	if err := json.Unmarshal(b, &multi); err == nil {
+		*f = multi
+		return nil
+	}
+
+	return fmt.Errorf("invalid deviceListStrategy: %v", string(b))
+}
+
+// GFDCommandLineFlags holds the list of command line flags specific to GFD.
+type GFDCommandLineFlags struct {
+	Oneshot         *bool     `json:"oneshot"         yaml:"oneshot"`
+	NoTimestamp     *bool     `json:"noTimestamp"     yaml:"noTimestamp"`
+	SleepInterval   *Duration `json:"sleepInterval"   yaml:"sleepInterval"`
+	OutputFile      *string   `json:"outputFile"      yaml:"outputFile"`
+	MachineTypeFile *string   `json:"machineTypeFile" yaml:"machineTypeFile"`
+}
+
+// UpdateFromCLIFlags updates Flags from settings in the cli Flags if they are set.
+func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) {
+	for _, flag := range flags {
+		for _, n := range flag.Names() {
+			// Common flags
+			switch n {
+			case "mig-strategy":
+				updateFromCLIFlag(&f.MigStrategy, c, n)
+			case "fail-on-init-error":
+				updateFromCLIFlag(&f.FailOnInitError, c, n)
+			case "mps-root":
+				updateFromCLIFlag(&f.MpsRoot, c, n)
+			case "driver-root", "nvidia-driver-root":
+				updateFromCLIFlag(&f.NvidiaDriverRoot, c, n)
+			case "dev-root", "nvidia-dev-root":
+				updateFromCLIFlag(&f.NvidiaDevRoot, c, n)
+			case "gds-enabled":
+				updateFromCLIFlag(&f.GDSEnabled, c, n)
+			case "mofed-enabled":
+				updateFromCLIFlag(&f.MOFEDEnabled, c, n)
+			case "use-node-feature-api":
+				updateFromCLIFlag(&f.UseNodeFeatureAPI, c, n)
+			case "device-discovery-strategy":
+				updateFromCLIFlag(&f.DeviceDiscoveryStrategy, c, n)
+			}
+			// Plugin specific flags
+			if f.Plugin == nil {
+				f.Plugin = &PluginCommandLineFlags{}
+			}
+			switch n {
+			case "pass-device-specs":
+				updateFromCLIFlag(&f.Plugin.PassDeviceSpecs, c, n)
+			case "device-list-strategy":
+				updateFromCLIFlag(&f.Plugin.DeviceListStrategy, c, n)
+			case "device-id-strategy":
+				updateFromCLIFlag(&f.Plugin.DeviceIDStrategy, c, n)
+			case "cdi-annotation-prefix":
+				updateFromCLIFlag(&f.Plugin.CDIAnnotationPrefix, c, n)
+			case "nvidia-cdi-hook-path", "nvidia-ctk-path":
+				updateFromCLIFlag(&f.Plugin.NvidiaCTKPath, c, n)
+			case "container-driver-root":
+				updateFromCLIFlag(&f.Plugin.ContainerDriverRoot, c, n)
+			}
+			// GFD specific flags
+			if f.GFD == nil {
+				f.GFD = &GFDCommandLineFlags{}
+			}
+			switch n {
+			case "oneshot":
+				updateFromCLIFlag(&f.GFD.Oneshot, c, n)
+			case "output-file":
+				updateFromCLIFlag(&f.GFD.OutputFile, c, n)
+			case "sleep-interval":
+				updateFromCLIFlag(&f.GFD.SleepInterval, c, n)
+			case "no-timestamp":
+				updateFromCLIFlag(&f.GFD.NoTimestamp, c, n)
+			case "machine-type-file":
+				updateFromCLIFlag(&f.GFD.MachineTypeFile, c, n)
+			}
+		}
+	}
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/flags_test.go b/pkg/nvidia-plugin/api/config/v1/flags_test.go
new file mode 100644
index 000000000..8f4ac792c
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/flags_test.go
@@ -0,0 +1,246 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"encoding/json"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestUnmarshalFlags(t *testing.T) {
+	testCases := []struct {
+		input  string
+		output Flags
+		err    bool
+	}{
+		{
+			input: ``,
+			err:   true,
+		},
+		{
+			input:  `{}`,
+			output: Flags{},
+		},
+		{
+			input: `{
+				"gfd": {}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{},
+				},
+			},
+		},
+		{
+			input: `{
+				"gfd": {
+					"sleepInterval": 0
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(0)),
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"gfd": {
+					"sleepInterval": "0s"
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(0)),
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"gfd": {
+					"sleepInterval": 5
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(5)),
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"gfd": {
+					"sleepInterval": "5s"
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(5 * time.Second)),
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"plugin": {
+					"deviceListStrategy": "envvar"
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					Plugin: &PluginCommandLineFlags{
+						DeviceListStrategy: &deviceListStrategyFlag{"envvar"},
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"plugin": {
+					"deviceListStrategy": ["envvar", "cdi-annotations"]
+				}
+			}`,
+			output: Flags{
+				CommandLineFlags{
+					Plugin: &PluginCommandLineFlags{
+						DeviceListStrategy: &deviceListStrategyFlag{"envvar", "cdi-annotations"},
+					},
+				},
+			},
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			var output Flags
+			err := json.Unmarshal([]byte(tc.input), &output)
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tc.output, output)
+		})
+	}
+}
+
+func TestMarshalFlags(t *testing.T) {
+	testCases := []struct {
+		input  Flags
+		output string
+		err    bool
+	}{
+		{
+			input: Flags{},
+			output: `{
+				"migStrategy": null,
+				"failOnInitError": null,
+				"gdsEnabled": null,
+				"mofedEnabled": null,
+				"useNodeFeatureAPI": null,
+				"deviceDiscoveryStrategy": null
+			}`,
+		},
+		{
+			input: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(0)),
+					},
+				},
+			},
+			output: `{
+				"migStrategy": null,
+				"failOnInitError": null,
+				"gdsEnabled": null,
+				"mofedEnabled": null,
+				"useNodeFeatureAPI": null,
+				"deviceDiscoveryStrategy": null,
+				"gfd": {
+					"oneshot": null,
+					"noTimestamp": null,
+					"outputFile": null,
+					"sleepInterval": "0s",
+					"machineTypeFile": null
+				}
+			}`,
+		},
+		{
+			input: Flags{
+				CommandLineFlags{
+					GFD: &GFDCommandLineFlags{
+						SleepInterval: ptr(Duration(5)),
+					},
+				},
+			},
+			output: `{
+				"migStrategy": null,
+				"failOnInitError": null,
+				"gdsEnabled": null,
+				"mofedEnabled": null,
+				"useNodeFeatureAPI": null,
+				"deviceDiscoveryStrategy": null,
+				"gfd": {
+					"oneshot": null,
+					"noTimestamp": null,
+					"outputFile": null,
+					"sleepInterval": "5ns",
+					"machineTypeFile": null
+				}
+			}`,
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			output, err := json.Marshal(tc.input)
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.JSONEq(t, tc.output, string(output))
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/imex.go b/pkg/nvidia-plugin/api/config/v1/imex.go
new file mode 100644
index 000000000..928e13e85
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/imex.go
@@ -0,0 +1,53 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package v1
+
+import (
+	"errors"
+	"fmt"
+)
+
+const (
+	ImexChannelEnvVar = "NVIDIA_IMEX_CHANNELS"
+)
+
+var errInvalidImexConfig = errors.New("invalid IMEX config")
+
+// Imex stores the configuration options for fabric-attached devices.
+type Imex struct {
+	// ChannelIDs defines a list of channel IDs to inject into containers that request NVIDIA devices.
+	// If a channel ID is specified and the associated channel device node exists, the corresponding
+	// channel will be added to the ContainerAllocateResponse for containers with access to NVIDIA
+	// devices.
+	ChannelIDs []int `json:"channelIDs,omitempty" yaml:"channelIDs,omitempty"`
+	// Required specifies whether the requested IMEX channel IDs are required or not.
+	// If a channel is required, it is expected to exist as the device plugin starts.
+	// If it is not required its injection is skipped if the device nodes do not exist or if its
+	// existence cannot be queried.
+	Required bool `json:"required,omitempty" yaml:"required,omitempty"`
+}
+
+// AssertChannelIDsIsValid checks whether the specified list of channel IDs is valid.
+func AssertChannelIDsValid(ids []int) error {
+	switch {
+	case len(ids) == 0:
+		return nil
+	case len(ids) == 1 && ids[0] == 0:
+		return nil
+	}
+	return fmt.Errorf("%w: channelIDs must be [] or [0]; found %v", errInvalidImexConfig, ids)
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/imex_test.go b/pkg/nvidia-plugin/api/config/v1/imex_test.go
new file mode 100644
index 000000000..7a0c72e3a
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/imex_test.go
@@ -0,0 +1,83 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package v1
+
+import (
+	"encoding/json"
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestImexUnmarshal(t *testing.T) {
+	testCases := []struct {
+		description   string
+		input         string
+		expected      Imex
+		expectedError error
+	}{
+		{
+			description: "empty json",
+			input:       "{}",
+			expected:    Imex{},
+		},
+		{
+			description: "null channel ID is valid",
+			input:       `{"channelIDs": null}`,
+			expected:    Imex{},
+		},
+		{
+			description: "empty channel ID is valid",
+			input:       `{"channelIDs": []}`,
+			expected: Imex{
+				ChannelIDs: []int{},
+			},
+		},
+		{
+			description: "single 0 channel ID is valid",
+			input:       `{"channelIDs": [0]}`,
+			expected: Imex{
+				ChannelIDs: []int{0},
+			},
+		},
+		{
+			description: "single 0 channel ID as int is valid",
+			input:       `{"channelIDs": [0]}`,
+			expected: Imex{
+				ChannelIDs: []int{0},
+			},
+		},
+		{
+			description: "invalid cases",
+			input:       `{"channelIDs": [2]}`,
+			expected: Imex{
+				ChannelIDs: []int{2},
+			},
+			expectedError: errInvalidImexConfig,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			var output Imex
+			err := json.Unmarshal([]byte(tc.input), &output)
+			require.ErrorIs(t, errors.Join(err, AssertChannelIDsValid(output.ChannelIDs)), tc.expectedError)
+			require.Equal(t, tc.expected, output)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/replicas.go b/pkg/nvidia-plugin/api/config/v1/replicas.go
new file mode 100644
index 000000000..f2448a195
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/replicas.go
@@ -0,0 +1,355 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/google/uuid"
+)
+
+// ReplicatedResources defines generic options for replicating devices.
+type ReplicatedResources struct {
+	RenameByDefault            bool                 `json:"renameByDefault,omitempty"            yaml:"renameByDefault,omitempty"`
+	FailRequestsGreaterThanOne bool                 `json:"failRequestsGreaterThanOne,omitempty" yaml:"failRequestsGreaterThanOne,omitempty"`
+	Resources                  []ReplicatedResource `json:"resources,omitempty"                  yaml:"resources,omitempty"`
+}
+
+func (rrs *ReplicatedResources) disableResoureRenaming(logger logger, id string) {
+	if rrs == nil {
+		return
+	}
+	renameByDefault := rrs.RenameByDefault
+	setsNonDefaultRename := false
+	setsDevices := false
+	for i, r := range rrs.Resources {
+		if !renameByDefault && r.Rename != "" {
+			setsNonDefaultRename = true
+			rrs.Resources[i].Rename = ""
+		}
+		if renameByDefault && r.Rename != r.Name.DefaultSharedRename() {
+			setsNonDefaultRename = true
+			rrs.Resources[i].Rename = r.Name.DefaultSharedRename()
+		}
+		if !r.Devices.All {
+			setsDevices = true
+			rrs.Resources[i].Devices.All = true
+			rrs.Resources[i].Devices.Count = 0
+			rrs.Resources[i].Devices.List = nil
+		}
+	}
+	if setsNonDefaultRename {
+		logger.Warningf("Setting the 'rename' field in sharing.%s.resources is not yet supported in the config. Ignoring...", id)
+	}
+	if setsDevices {
+		logger.Warningf("Customizing the 'devices' field in sharing.%s.resources is not yet supported in the config. Ignoring...", id)
+	}
+
+}
+
+func (rrs *ReplicatedResources) isReplicated() bool {
+	if rrs == nil {
+		return false
+	}
+	for _, rr := range rrs.Resources {
+		if rr.Replicas > 1 {
+			return true
+		}
+	}
+	return false
+}
+
+// ReplicatedResource represents a resource to be replicated.
+type ReplicatedResource struct {
+	Name     ResourceName      `json:"name"             yaml:"name"`
+	Rename   ResourceName      `json:"rename,omitempty" yaml:"rename,omitempty"`
+	Devices  ReplicatedDevices `json:"devices"          yaml:"devices,flow"`
+	Replicas int               `json:"replicas"         yaml:"replicas"`
+}
+
+// ReplicatedDevices encapsulates the set of devices that should be replicated for a given resource.
+// This struct should be treated as a 'union' and only one of the fields in this struct should be set at any given time.
+type ReplicatedDevices struct {
+	All   bool
+	Count int
+	List  []ReplicatedDeviceRef
+}
+
+// ReplicatedDeviceRef can either be a full GPU index, a MIG index, or a UUID (full GPU or MIG)
+type ReplicatedDeviceRef string
+
+// IsGPUIndex checks if a ReplicatedDeviceRef is a full GPU index
+func (d ReplicatedDeviceRef) IsGPUIndex() bool {
+	if _, err := strconv.ParseUint(string(d), 10, 0); err != nil {
+		return false
+	}
+	return true
+}
+
+// IsMigIndex checks if a ReplicatedDeviceRef is a MIG index
+func (d ReplicatedDeviceRef) IsMigIndex() bool {
+	split := strings.SplitN(string(d), ":", 2)
+	if len(split) != 2 {
+		return false
+	}
+	for _, s := range split {
+		if _, err := strconv.ParseUint(s, 10, 0); err != nil {
+			return false
+		}
+	}
+	return true
+}
+
+// IsUUID checks if a ReplicatedDeviceRef is a UUID
+func (d ReplicatedDeviceRef) IsUUID() bool {
+	return d.IsGpuUUID() || d.IsMigUUID()
+}
+
+// IsGpuUUID checks if a ReplicatedDeviceRef is a GPU UUID
+// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763
+func (d ReplicatedDeviceRef) IsGpuUUID() bool {
+	if !strings.HasPrefix(string(d), "GPU-") {
+		return false
+	}
+	_, err := uuid.Parse(strings.TrimPrefix(string(d), "GPU-"))
+	return err == nil
+}
+
+// IsMigUUID checks if a ReplicatedDeviceRef is a MIG UUID
+// A MIG UUID can be of one of two forms:
+//   - MIG-b1028956-cfa2-0990-bf4a-5da9abb51763
+//   - MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0
+func (d ReplicatedDeviceRef) IsMigUUID() bool {
+	if !strings.HasPrefix(string(d), "MIG-") {
+		return false
+	}
+	suffix := strings.TrimPrefix(string(d), "MIG-")
+	_, err := uuid.Parse(suffix)
+	if err == nil {
+		return true
+	}
+	split := strings.SplitN(suffix, "/", 3)
+	if len(split) != 3 {
+		return false
+	}
+	if !ReplicatedDeviceRef(split[0]).IsGpuUUID() {
+		return false
+	}
+	for _, s := range split[1:] {
+		_, err := strconv.ParseUint(s, 10, 0)
+		if err != nil {
+			return false
+		}
+	}
+	return true
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'ReplicatedResources' struct.
+func (s *ReplicatedResources) UnmarshalJSON(b []byte) error {
+	ts := make(map[string]json.RawMessage)
+	err := json.Unmarshal(b, &ts)
+	if err != nil {
+		return err
+	}
+
+	renameByDefault, exists := ts["renameByDefault"]
+	if !exists {
+		renameByDefault = []byte(`false`)
+	}
+
+	err = json.Unmarshal(renameByDefault, &s.RenameByDefault)
+	if err != nil {
+		return err
+	}
+
+	failRequestsGreaterThanOne, exists := ts["failRequestsGreaterThanOne"]
+	if !exists {
+		failRequestsGreaterThanOne = []byte(`false`)
+	}
+
+	err = json.Unmarshal(failRequestsGreaterThanOne, &s.FailRequestsGreaterThanOne)
+	if err != nil {
+		return err
+	}
+
+	resources, exists := ts["resources"]
+	if !exists {
+		return fmt.Errorf("no resources specified")
+	}
+
+	err = json.Unmarshal(resources, &s.Resources)
+	if err != nil {
+		return err
+	}
+
+	if len(s.Resources) == 0 {
+		return fmt.Errorf("no resources specified")
+	}
+
+	for i, r := range s.Resources {
+		if s.RenameByDefault && r.Rename == "" {
+			s.Resources[i].Rename = r.Name.DefaultSharedRename()
+		}
+	}
+
+	return nil
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'ReplicatedResource' struct.
+func (s *ReplicatedResource) UnmarshalJSON(b []byte) error {
+	rr := make(map[string]json.RawMessage)
+	err := json.Unmarshal(b, &rr)
+	if err != nil {
+		return err
+	}
+
+	name, exists := rr["name"]
+	if !exists {
+		return fmt.Errorf("no resource name specified")
+	}
+
+	err = json.Unmarshal(name, &s.Name)
+	if err != nil {
+		return err
+	}
+
+	devices, exists := rr["devices"]
+	if !exists {
+		devices = []byte(`"all"`)
+	}
+
+	err = json.Unmarshal(devices, &s.Devices)
+	if err != nil {
+		return err
+	}
+
+	replicas, exists := rr["replicas"]
+	if !exists {
+		return fmt.Errorf("no replicas specified")
+	}
+
+	err = json.Unmarshal(replicas, &s.Replicas)
+	if err != nil {
+		return err
+	}
+
+	if s.Replicas < 2 {
+		return fmt.Errorf("number of replicas must be >= 2")
+	}
+
+	rename, exists := rr["rename"]
+	if !exists {
+		return nil
+	}
+
+	err = json.Unmarshal(rename, &s.Rename)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'ReplicatedDevices' struct.
+func (s *ReplicatedDevices) UnmarshalJSON(b []byte) error {
+	// Match the string 'all'
+	var str string
+	err := json.Unmarshal(b, &str)
+	if err == nil {
+		if str != "all" {
+			return fmt.Errorf("devices set as '%v' but the only valid string input is 'all'", str)
+		}
+		s.All = true
+		return nil
+	}
+
+	// Match a count
+	var count int
+	err = json.Unmarshal(b, &count)
+	if err == nil {
+		if count <= 0 {
+			return fmt.Errorf("devices set as '%v' but a count of devices must be > 0", count)
+		}
+		s.Count = count
+		return nil
+	}
+
+	// Match a list
+	var slice []json.RawMessage
+	err = json.Unmarshal(b, &slice)
+	if err == nil {
+		// For each item in the list check its format and convert it to a string (if necessary)
+		result := make([]ReplicatedDeviceRef, len(slice))
+		for i, s := range slice {
+			// Match a uint as a GPU index and convert it to a string
+			var index uint64
+			if err = json.Unmarshal(s, &index); err == nil {
+				result[i] = ReplicatedDeviceRef(strconv.FormatUint(index, 10))
+				continue
+			}
+			// Match strings as valid entries if they are GPU indices, MIG indices, or UUIDs
+			var item string
+			if err = json.Unmarshal(s, &item); err == nil {
+				rd := ReplicatedDeviceRef(item)
+				if rd.IsGPUIndex() || rd.IsMigIndex() || rd.IsUUID() {
+					result[i] = rd
+					continue
+				}
+			}
+			// Treat any other entries as errors
+			return fmt.Errorf("unsupported type for device in devices list: %v, %T", item, item)
+		}
+		s.List = result
+		return nil
+	}
+
+	// No matches found
+	return fmt.Errorf("unrecognized type for devices spec: %v", string(b))
+}
+
+// MarshalJSON marshals ReplicatedDevices to its raw bytes representation
+func (s *ReplicatedDevices) MarshalJSON() ([]byte, error) {
+	if s.All {
+		return json.Marshal("all")
+	}
+	if s.Count > 0 {
+		return json.Marshal(s.Count)
+	}
+	if s.List != nil {
+		return json.Marshal(s.List)
+	}
+	return nil, fmt.Errorf("unmarshallable ReplicatedDevices struct: %v", s)
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/replicas_test.go b/pkg/nvidia-plugin/api/config/v1/replicas_test.go
new file mode 100644
index 000000000..7392cb385
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/replicas_test.go
@@ -0,0 +1,482 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func NoErrorNewResourceName(n string) ResourceName {
+	rn, _ := NewResourceName(n)
+	return rn
+}
+
+func TestReplicatedDeviceRef(t *testing.T) {
+	testCases := []struct {
+		input    string
+		expected string
+	}{
+		{
+			input:    "0",
+			expected: "gpuIndex",
+		},
+		{
+			input:    "0:0",
+			expected: "migIndex",
+		},
+		{
+			input:    "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c",
+			expected: "uuid",
+		},
+		{
+			input:    "MIG-3eb87630-93d5-b2b6-b8ff-9b359caf4ee2",
+			expected: "uuid",
+		},
+		{
+			input:    "MIG-GPU-662077db-fa3f-0d8f-9502-21ab0ef058a2/10/0",
+			expected: "uuid",
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			switch tc.expected {
+			case "gpuIndex":
+				require.True(t, ReplicatedDeviceRef(tc.input).IsGPUIndex())
+				require.False(t, ReplicatedDeviceRef(tc.input).IsMigIndex())
+				require.False(t, ReplicatedDeviceRef(tc.input).IsUUID())
+			case "migIndex":
+				require.False(t, ReplicatedDeviceRef(tc.input).IsGPUIndex())
+				require.True(t, ReplicatedDeviceRef(tc.input).IsMigIndex())
+				require.False(t, ReplicatedDeviceRef(tc.input).IsUUID())
+			case "uuid":
+				require.False(t, ReplicatedDeviceRef(tc.input).IsGPUIndex())
+				require.False(t, ReplicatedDeviceRef(tc.input).IsMigIndex())
+				require.True(t, ReplicatedDeviceRef(tc.input).IsUUID())
+			}
+		})
+	}
+}
+
+func TestMarshalReplicatedDevices(t *testing.T) {
+	testCases := []struct {
+		input  ReplicatedDevices
+		output string
+		err    bool
+	}{
+		{
+			input: ReplicatedDevices{},
+			err:   true,
+		},
+		{
+			input: ReplicatedDevices{
+				All: true,
+			},
+			output: `"all"`,
+		},
+		{
+			input: ReplicatedDevices{
+				Count: 2,
+			},
+			output: `2`,
+		},
+		{
+			input: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0", "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"},
+			},
+			output: `["0", "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			output, err := tc.input.MarshalJSON()
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.JSONEq(t, tc.output, string(output))
+		})
+	}
+}
+
+func TestUnmarshalReplicatedDevices(t *testing.T) {
+	testCases := []struct {
+		input  string
+		output ReplicatedDevices
+		err    bool
+	}{
+		{
+			input: ``,
+			err:   true,
+		},
+		{
+			input: `"not-all"`,
+			err:   true,
+		},
+		{
+			input: `-2`,
+			err:   true,
+		},
+		{
+			input: `2.0`,
+			err:   true,
+		},
+		{
+			input: `[-1]`,
+			err:   true,
+		},
+		{
+			input: `["-1"]`,
+			err:   true,
+		},
+		{
+			input: `["invalid-UUID"]`,
+			err:   true,
+		},
+		{
+			input: `["GPU-UUID"]`,
+			err:   true,
+		},
+		{
+			input: `["MIG-UUID"]`,
+			err:   true,
+		},
+		{
+			input: `["MIG-GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+			err:   true,
+		},
+		{
+			input: `"all"`,
+			output: ReplicatedDevices{
+				All: true,
+			},
+		},
+		{
+			input: `2`,
+			output: ReplicatedDevices{
+				Count: 2,
+			},
+		},
+		{
+			input: `[0]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0"},
+			},
+		},
+		{
+			input: `["0"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0"},
+			},
+		},
+		{
+			input: `["0:0"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0:0"},
+			},
+		},
+		{
+			input: `["GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"},
+			},
+		},
+		{
+			input: `["MIG-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"MIG-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"},
+			},
+		},
+		{
+			input: `["MIG-GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c/0/0"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"MIG-GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c/0/0"},
+			},
+		},
+		{
+			input: `[0, "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0", "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"},
+			},
+		},
+		{
+			input: `["0", "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"]`,
+			output: ReplicatedDevices{
+				List: []ReplicatedDeviceRef{"0", "0:0", "GPU-4cf8db2d-06c0-7d70-1a51-e59b25b2c16c"},
+			},
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			var output ReplicatedDevices
+			err := output.UnmarshalJSON([]byte(tc.input))
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tc.output, output)
+		})
+	}
+}
+
+func TestUnmarshalReplicatedResource(t *testing.T) {
+	testCases := []struct {
+		input  string
+		output ReplicatedResource
+		err    bool
+	}{
+		{
+			input: ``,
+			err:   true,
+		},
+		{
+			input: `{}`,
+			err:   true,
+		},
+		{
+			input: `{
+				"name": "valid",
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"devices": "all",
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"devices": "all",
+				"rename": "valid-shared",
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"devices": "all",
+				"replicas": 2
+			}`,
+			output: ReplicatedResource{
+				Name:     NoErrorNewResourceName("valid"),
+				Devices:  ReplicatedDevices{All: true},
+				Replicas: 2,
+			},
+		},
+		{
+			input: `{
+				"name": "valid",
+				"devices": "all",
+				"replicas": 2,
+				"rename": "valid-shared"
+			}`,
+			output: ReplicatedResource{
+				Name:     NoErrorNewResourceName("valid"),
+				Devices:  ReplicatedDevices{All: true},
+				Replicas: 2,
+				Rename:   NoErrorNewResourceName("valid-shared"),
+			},
+		},
+		{
+			input: `{
+				"name": "valid",
+				"replicas": -1,
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"replicas": 0,
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"replicas": 2
+			}`,
+			output: ReplicatedResource{
+				Name:     NoErrorNewResourceName("valid"),
+				Devices:  ReplicatedDevices{All: true},
+				Replicas: 2,
+			},
+		},
+		{
+			input: `{
+				"name": "valid",
+				"replicas": 2,
+				"rename": "valid-shared"
+			}`,
+			output: ReplicatedResource{
+				Name:     NoErrorNewResourceName("valid"),
+				Devices:  ReplicatedDevices{All: true},
+				Replicas: 2,
+				Rename:   NoErrorNewResourceName("valid-shared"),
+			},
+		},
+		{
+			input: `{
+				"name": "$invalid$",
+				"replicas": 2,
+				"rename": "valid-shared"
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"name": "valid",
+				"replicas": 2,
+				"rename": "$invalid$"
+			}`,
+			err: true,
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			var output ReplicatedResource
+			err := output.UnmarshalJSON([]byte(tc.input))
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tc.output, output)
+		})
+	}
+}
+
+func TestUnmarshalReplicatedResources(t *testing.T) {
+	testCases := []struct {
+		input  string
+		output ReplicatedResources
+		err    bool
+	}{
+		{
+			input: ``,
+			err:   true,
+		},
+		{
+			input: `{}`,
+			err:   true,
+		},
+		{
+			input: `{
+				"resources": []
+			}`,
+			err: true,
+		},
+		{
+			input: `{
+				"resources": [
+					{
+						"name": "valid",
+						"replicas": 2
+					}
+				]
+			}`,
+			output: ReplicatedResources{
+				Resources: []ReplicatedResource{
+					{
+						Name:     NoErrorNewResourceName("valid"),
+						Devices:  ReplicatedDevices{All: true},
+						Replicas: 2,
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"resources": [
+					{
+						"name": "valid1",
+						"replicas": 2
+					},
+					{
+						"name": "valid2",
+						"replicas": 2
+					}
+				]
+			}`,
+			output: ReplicatedResources{
+				Resources: []ReplicatedResource{
+					{
+						Name:     NoErrorNewResourceName("valid1"),
+						Devices:  ReplicatedDevices{All: true},
+						Replicas: 2,
+					},
+					{
+						Name:     NoErrorNewResourceName("valid2"),
+						Devices:  ReplicatedDevices{All: true},
+						Replicas: 2,
+					},
+				},
+			},
+		},
+		{
+			input: `{
+				"resources": [
+					{
+						"name": "$invalid$",
+						"replicas": 2
+					}
+				]
+			}`,
+			err: true,
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			var output ReplicatedResources
+			err := output.UnmarshalJSON([]byte(tc.input))
+			if tc.err {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tc.output, output)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/resources.go b/pkg/nvidia-plugin/api/config/v1/resources.go
new file mode 100644
index 000000000..1edb98afc
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/resources.go
@@ -0,0 +1,196 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"strings"
+
+	k8s "k8s.io/apimachinery/pkg/api/validation"
+)
+
+// ResourcePattern is used to match a resource name to a specific pattern
+type ResourcePattern string
+
+// ResourceName represents a valid resource name in Kubernetes
+type ResourceName string
+
+// Resource pairs a pattern matcher with a resource name.
+type Resource struct {
+	Pattern ResourcePattern `json:"pattern" yaml:"pattern"`
+	Name    ResourceName    `json:"name"    yaml:"name"`
+}
+
+// Resources lists full GPUs and MIG devices separately.
+type Resources struct {
+	GPUs []Resource `json:"gpus"           yaml:"gpus"`
+	MIGs []Resource `json:"mig,omitempty"  yaml:"mig,omitempty"`
+}
+
+// NewResourceName builds a resource name from the standard prefix and a name.
+// An error is returned if the format is incorrect.
+func NewResourceName(n string) (ResourceName, error) {
+	if !strings.HasPrefix(n, ResourceNamePrefix+"/") {
+		n = ResourceNamePrefix + "/" + n
+	}
+
+	if len(n) > MaxResourceNameLength {
+		return "", fmt.Errorf("fully-qualified resource name must be %v characters or less: %v", MaxResourceNameLength, n)
+	}
+
+	_, name := ResourceName(n).Split()
+	invalid := k8s.NameIsDNSSubdomain(name, false)
+	if len(invalid) != 0 {
+		return "", fmt.Errorf("incorrect format for resource name '%v': %v", n, invalid)
+	}
+
+	return ResourceName(n), nil
+}
+
+// NewResource builds a resource from a name and pattern
+func NewResource(pattern, name string) (*Resource, error) {
+	resourceName, err := NewResourceName(name)
+	if err != nil {
+		return nil, fmt.Errorf("invalid resource name: %v", err)
+	}
+	r := &Resource{
+		Pattern: ResourcePattern(pattern),
+		Name:    resourceName,
+	}
+	return r, nil
+}
+
+// Split splits a full resource name into prefix and name
+func (r ResourceName) Split() (string, string) {
+	split := strings.SplitN(string(r), "/", 2)
+	if len(split) != 2 {
+		return "", string(r)
+	}
+	return split[0], split[1]
+}
+
+// DefaultSharedRename returns the default renaming to apply when this resource is shared
+func (r ResourceName) DefaultSharedRename() ResourceName {
+	return r + DefaultSharedResourceNameSuffix
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'Resource' struct.
+func (r *Resource) UnmarshalJSON(b []byte) error {
+	res := make(map[string]json.RawMessage)
+	err := json.Unmarshal(b, &res)
+	if err != nil {
+		return err
+	}
+
+	// Verify both fields set in the resource JSON
+	pattern, patternExists := res["pattern"]
+	name, nameExists := res["name"]
+	if !patternExists {
+		return fmt.Errorf("resources must have a 'pattern' field set")
+	}
+	if !nameExists {
+		return fmt.Errorf("resources must have a 'name' field set")
+	}
+
+	// Set r.Pattern from the resource JSON
+	err = json.Unmarshal(pattern, &r.Pattern)
+	if err != nil {
+		return err
+	}
+
+	// Set r.Name from the resource JSON
+	err = json.Unmarshal(name, &r.Name)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalJSON unmarshals raw bytes into a 'ResourceName' type.
+func (r *ResourceName) UnmarshalJSON(b []byte) error {
+	var raw string
+	err := json.Unmarshal(b, &raw)
+	if err != nil {
+		return err
+	}
+
+	*r, err = NewResourceName(raw)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// AddGPUResource adds a GPU resource to the list of GPU resources.
+func (r *Resources) AddGPUResource(pattern, name string) error {
+	resource, err := NewResource(pattern, name)
+	if err != nil {
+		return err
+	}
+	r.GPUs = append(r.GPUs, *resource)
+	return nil
+}
+
+// AddMIGResource adds a MIG resource to the list of MIG resources.
+func (r *Resources) AddMIGResource(pattern, name string) error {
+	resource, err := NewResource(pattern, name)
+	if err != nil {
+		return err
+	}
+	r.MIGs = append(r.MIGs, *resource)
+	return nil
+}
+
+// Matches checks if the provided string matches the ResourcePattern or not.
+func (p ResourcePattern) Matches(s string) bool {
+	result, _ := regexp.MatchString(wildCardToRegexp(string(p)), s)
+	return result
+}
+
+// wildCardToRegexp converts a wildcard pattern to a regular expression pattern.
+func wildCardToRegexp(pattern string) string {
+	var result strings.Builder
+	for i, literal := range strings.Split(pattern, "*") {
+		// Replace * with .*
+		if i > 0 {
+			result.WriteString(".*")
+		}
+		// Quote any regular expression meta characters in the literal text.
+		result.WriteString(regexp.QuoteMeta(literal))
+	}
+	return result.String()
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/sharing.go b/pkg/nvidia-plugin/api/config/v1/sharing.go
new file mode 100644
index 000000000..e7b3b9af0
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/sharing.go
@@ -0,0 +1,69 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package v1
+
+// Sharing encapsulates the set of sharing strategies that are supported.
+type Sharing struct {
+	// TimeSlicing defines the set of replicas to be made for timeSlicing available resources.
+	TimeSlicing ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"`
+	// MPS defines the set of replicas to be shared using MPS
+	MPS *ReplicatedResources `json:"mps,omitempty"         yaml:"mps,omitempty"`
+}
+
+type SharingStrategy string
+
+const (
+	SharingStrategyMPS         = SharingStrategy("mps")
+	SharingStrategyNone        = SharingStrategy("none")
+	SharingStrategyTimeSlicing = SharingStrategy("time-slicing")
+)
+
+// SharingStrategy returns the active sharing strategy.
+func (s *Sharing) SharingStrategy() SharingStrategy {
+	if s.MPS != nil && s.MPS.isReplicated() {
+		return SharingStrategyMPS
+	}
+
+	if s.TimeSlicing.isReplicated() {
+		return SharingStrategyTimeSlicing
+	}
+	return SharingStrategyNone
+}
+
+// ReplicatedResources returns the resources associated with the active sharing strategy.
+func (s *Sharing) ReplicatedResources() *ReplicatedResources {
+	if s.MPS != nil {
+		return s.MPS
+	}
+	return &s.TimeSlicing
+}
diff --git a/pkg/nvidia-plugin/api/config/v1/strategy.go b/pkg/nvidia-plugin/api/config/v1/strategy.go
new file mode 100644
index 000000000..4d39581c9
--- /dev/null
+++ b/pkg/nvidia-plugin/api/config/v1/strategy.go
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c), NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package v1
+
+import (
+	"fmt"
+	"strings"
+)
+
+// DeviceListStrategies defines which strategies are enabled and should
+// be used when passing the device list to the container runtime.
+type DeviceListStrategies map[string]bool
+
+// NewDeviceListStrategies constructs a new DeviceListStrategy
+func NewDeviceListStrategies(strategies []string) (DeviceListStrategies, error) {
+	ret := map[string]bool{
+		DeviceListStrategyEnvVar:         false,
+		DeviceListStrategyVolumeMounts:   false,
+		DeviceListStrategyCDIAnnotations: false,
+		DeviceListStrategyCDICRI:         false,
+	}
+	for _, s := range strategies {
+		if _, ok := ret[s]; !ok {
+			return nil, fmt.Errorf("invalid strategy: %v", s)
+		}
+		ret[s] = true
+	}
+
+	return DeviceListStrategies(ret), nil
+}
+
+// Includes returns whether the given strategy is present in the set of strategies.
+func (s DeviceListStrategies) Includes(strategy string) bool {
+	return s[strategy]
+}
+
+// AnyCDIEnabled returns whether any of the strategies being used require CDI.
+func (s DeviceListStrategies) AnyCDIEnabled() bool {
+	for k, v := range s {
+		if strings.HasPrefix(k, "cdi-") && v {
+			return true
+		}
+	}
+	return false
+}
+
+// AllCDIEnabled returns whether all strategies being used require CDI.
+func (s DeviceListStrategies) AllCDIEnabled() bool {
+	for k, v := range s {
+		if !strings.HasPrefix(k, "cdi-") && v {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/main.go b/pkg/nvidia-plugin/mps-control-daemon/main.go
new file mode 100644
index 000000000..29259c29c
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/main.go
@@ -0,0 +1,255 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package main
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"syscall"
+	"time"
+
+	"github.com/urfave/cli/v2"
+	"k8s.io/klog/v2"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	nvinfo "github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mount"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mps"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/info"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/logger"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/watch"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+// Config represents a collection of config options for the device plugin.
+type Config struct {
+	configFile string
+
+	// flags stores the CLI flags for later processing.
+	flags []cli.Flag
+}
+
+func main() {
+	config := &Config{}
+
+	c := cli.NewApp()
+	c.Name = "NVIDIA MPS Control Daemon"
+	c.Version = info.GetVersionString()
+	c.Action = func(ctx *cli.Context) error {
+		return start(ctx, config)
+	}
+	c.Commands = []*cli.Command{
+		mount.NewCommand(),
+	}
+
+	config.flags = []cli.Flag{
+		&cli.StringFlag{
+			Name:        "config-file",
+			Usage:       "the path to a config file as an alternative to command line options or environment variables",
+			Destination: &config.configFile,
+			EnvVars:     []string{"CONFIG_FILE"},
+		},
+		&cli.StringFlag{
+			Name:    "mig-strategy",
+			Value:   spec.MigStrategyNone,
+			Usage:   "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]",
+			EnvVars: []string{"MIG_STRATEGY"},
+		},
+	}
+	c.Flags = config.flags
+
+	klog.InfoS(c.Name, "version", c.Version)
+	err := c.Run(os.Args)
+	if err != nil {
+		klog.Error(err)
+		os.Exit(1)
+	}
+}
+
+// TODO: This needs to do similar validation to the plugin.
+func validateFlags(config *spec.Config) error {
+	return nil
+}
+
+// loadConfig loads the config from the spec file.
+func (cfg *Config) loadConfig(c *cli.Context) (*spec.Config, error) {
+	config, err := spec.NewConfig(c, cfg.flags)
+	if err != nil {
+		return nil, fmt.Errorf("unable to finalize config: %w", err)
+	}
+	err = validateFlags(config)
+	if err != nil {
+		return nil, fmt.Errorf("unable to validate flags: %w", err)
+	}
+	config.Flags.GFD = nil
+
+	return config, nil
+}
+
+// loadConfig loads the config from the spec file.
+func (cfg *Config) loadNvidiaConfig(c *cli.Context) (*nvidia.DeviceConfig, error) {
+	devcfg := &nvidia.DeviceConfig{}
+
+	config, err := spec.NewConfig(c, cfg.flags)
+	if err != nil {
+		return nil, fmt.Errorf("unable to finalize config: %w", err)
+	}
+	err = validateFlags(config)
+	if err != nil {
+		return nil, fmt.Errorf("unable to validate flags: %w", err)
+	}
+	config.Flags.GFD = nil
+	// Set the config in the device config.
+	devcfg.Config = config
+	return devcfg, nil
+}
+
+func start(c *cli.Context, cfg *Config) error {
+	klog.Info("Starting OS watcher.")
+	sigs := watch.Signals(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
+	var started bool
+	var restartTimeout <-chan time.Time
+	var daemons []*mps.Daemon
+restart:
+	// If we are restarting, stop daemons from previous run.
+	if started {
+		err := stopDaemons(daemons...)
+		if err != nil {
+			return fmt.Errorf("error stopping plugins from previous run: %v", err)
+		}
+	}
+
+	klog.Info("Starting Daemons.")
+	daemons, restartDaemons, err := startDaemons(c, cfg)
+	if err != nil {
+		return fmt.Errorf("error starting plugins: %v", err)
+	}
+	started = true
+
+	if restartDaemons {
+		klog.Infof("Failed to start one or more MPS deamons. Retrying in 30s...")
+		restartTimeout = time.After(30 * time.Second)
+	}
+
+	// Start an infinite loop, waiting for several indicators to either log
+	// some messages, trigger a restart of the plugins, or exit the program.
+	for {
+		select {
+		// If the restart timeout has expired, then restart the plugins
+		case <-restartTimeout:
+			goto restart
+
+		// Watch for any signals from the OS. On SIGHUP, restart this loop,
+		// restarting all of the plugins in the process. On all other
+		// signals, exit the loop and exit the program.
+		case s := <-sigs:
+			switch s {
+			case syscall.SIGHUP:
+				klog.Info("Received SIGHUP, restarting.")
+				goto restart
+			default:
+				klog.Infof("Received signal \"%v\", shutting down.", s)
+				goto exit
+			}
+		}
+	}
+exit:
+	if err := stopDaemons(daemons...); err != nil {
+		return fmt.Errorf("error stopping daemons: %v", err)
+	}
+	return nil
+}
+
+func startDaemons(c *cli.Context, cfg *Config) ([]*mps.Daemon, bool, error) {
+	// Load the configuration file
+	klog.Info("Loading configuration.")
+	config, err := cfg.loadNvidiaConfig(c)
+	if err != nil {
+		return nil, false, fmt.Errorf("unable to load config: %v", err)
+	}
+	spec.DisableResourceNamingInConfig(logger.ToKlog, config.Config)
+
+	nvmllib := nvml.New()
+	devicelib := device.New(nvmllib)
+	infolib := nvinfo.New(
+		nvinfo.WithNvmlLib(nvmllib),
+		nvinfo.WithDeviceLib(devicelib),
+	)
+
+	// Update the configuration file with default resources.
+	klog.Info("Updating config with default resource matching patterns.")
+	err = rm.AddDefaultResourcesToConfig(infolib, nvmllib, devicelib, config.Config)
+	if err != nil {
+		return nil, false, fmt.Errorf("unable to add default resources to config: %v", err)
+	}
+
+	// Print the config to the output.
+	configJSON, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return nil, false, fmt.Errorf("failed to marshal config to JSON: %v", err)
+	}
+	klog.Infof("\nRunning with config:\n%v", string(configJSON))
+
+	// Get the set of daemons.
+	// Note that a daemon is only created for resources with at least one device.
+	klog.Info("Retrieving MPS daemons.")
+	mpsDaemons, err := mps.NewDaemons(infolib, nvmllib, devicelib,
+		mps.WithConfig(config),
+	)
+	if err != nil {
+		return nil, false, fmt.Errorf("error getting daemons: %v", err)
+	}
+
+	if len(mpsDaemons) == 0 {
+		klog.Info("No devices are configured for MPS sharing; Waiting indefinitely.")
+	}
+
+	// Loop through all MPS daemons and start them.
+	// If any daemon fails to start, all daemons are started again.
+	for _, mpsDaemon := range mpsDaemons {
+		if err := mpsDaemon.Start(); err != nil {
+			klog.Errorf("Failed to start MPS daemon: %v", err)
+			return mpsDaemons, true, nil
+		}
+	}
+	readyFile, err := os.Create("/mps/.ready")
+	if err != nil {
+		return mpsDaemons, true, fmt.Errorf("failed to create .ready file")
+	}
+	defer readyFile.Close()
+
+	return mpsDaemons, false, nil
+}
+
+func stopDaemons(mpsDaemons ...*mps.Daemon) error {
+	if err := os.Remove("/mps/.ready"); err != nil {
+		klog.Warningf("Failed to remove .ready file: %v", err)
+	}
+	klog.Info("Stopping MPS daemons.")
+	var errs error
+	for _, p := range mpsDaemons {
+		errs = errors.Join(errs, p.Stop())
+	}
+	return errs
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go b/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
new file mode 100644
index 000000000..83825e812
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
@@ -0,0 +1,108 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mount
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+
+	"github.com/urfave/cli/v2"
+	"k8s.io/klog/v2"
+	"k8s.io/mount-utils"
+)
+
+// NewCommand constructs a mount command.
+func NewCommand() *cli.Command {
+	c := cli.Command{
+		Name:   "mount-shm",
+		Usage:  "Set up the /dev/shm mount required by the MPS daemon",
+		Action: mountShm,
+	}
+
+	return &c
+}
+
+// mountShm creates a tmpfs mount at /mps/shm to be used by the mps control daemon.
+func mountShm(c *cli.Context) error {
+	mountExecutable, err := exec.LookPath("mount")
+	if err != nil {
+		return fmt.Errorf("error finding 'mount' executable: %w", err)
+	}
+	mounter := mount.New(mountExecutable)
+
+	// TODO: /mps should be configurable.
+	shmDir := "/mps/shm"
+	err = mount.CleanupMountPoint(shmDir, mounter, true)
+	if err != nil {
+		return fmt.Errorf("error unmounting %v: %w", shmDir, err)
+	}
+
+	if err := os.MkdirAll(shmDir, 0755); err != nil {
+		return fmt.Errorf("error creating directory %v: %w", shmDir, err)
+	}
+
+	sizeArg := fmt.Sprintf("size=%v", getDefaultShmSize())
+	mountOptions := []string{"rw", "nosuid", "nodev", "noexec", "relatime", sizeArg}
+	if err := mounter.Mount("shm", shmDir, "tmpfs", mountOptions); err != nil {
+		return fmt.Errorf("error mounting %v as tmpfs: %w", shmDir, err)
+	}
+
+	return nil
+}
+
+// getDefaultShmSize returns the default size for the tmpfs to be created.
+// This reads /proc/meminfo to get the total memory to calculate this. If this
+// fails a fallback size of 65536k is used.
+func getDefaultShmSize() string {
+	const fallbackSize = "65536k"
+
+	meminfo, err := os.Open("/proc/meminfo")
+	if err != nil {
+		klog.ErrorS(err, "failed to open /proc/meminfo")
+		return fallbackSize
+	}
+	defer func() {
+		_ = meminfo.Close()
+	}()
+
+	scanner := bufio.NewScanner(meminfo)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "MemTotal:") {
+			continue
+		}
+
+		parts := strings.SplitN(strings.TrimSpace(strings.TrimPrefix(line, "MemTotal:")), " ", 2)
+		memTotal, err := strconv.Atoi(parts[0])
+		if err != nil {
+			klog.ErrorS(err, "could not convert MemTotal to an integer")
+			return fallbackSize
+		}
+
+		var unit string
+		if len(parts) == 2 {
+			unit = string(parts[1][0])
+		}
+
+		return fmt.Sprintf("%d%s", memTotal/2, unit)
+	}
+	return fallbackSize
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go b/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
new file mode 100644
index 000000000..5d23c61ae
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
@@ -0,0 +1,280 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/opencontainers/selinux/go-selinux"
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+)
+
+type computeMode string
+
+const (
+	mpsControlBin = "nvidia-cuda-mps-control"
+
+	computeModeExclusiveProcess = computeMode("EXCLUSIVE_PROCESS")
+	computeModeDefault          = computeMode("DEFAULT")
+
+	unprivilegedContainerSELinuxLabel = "system_u:object_r:container_file_t:s0"
+)
+
+// Daemon represents an MPS daemon.
+// It is associated with a specific kubernets resource and is responsible for
+// starting and stopping the deamon as well as ensuring that the memory and
+// thread limits are set for the devices that the resource makes available.
+type Daemon struct {
+	rm rm.ResourceManager
+	// root represents the root at which the files and folders controlled by the
+	// daemon are created. These include the log and pipe directories.
+	root Root
+	// logTailer tails the MPS control daemon logs.
+	logTailer *tailer
+}
+
+// NewDaemon creates an MPS daemon instance.
+func NewDaemon(rm rm.ResourceManager, root Root) *Daemon {
+	return &Daemon{
+		rm:   rm,
+		root: root,
+	}
+}
+
+// Devices returns the list of devices under the control of this MPS daemon.
+func (d *Daemon) Devices() rm.Devices {
+	return d.rm.Devices()
+}
+
+type envvars map[string]string
+
+func (e envvars) toSlice() []string {
+	var envs []string
+	for k, v := range e {
+		envs = append(envs, k+"="+v)
+	}
+	return envs
+}
+
+// EnvVars returns the environment variables required for the daemon.
+// These should be passed to clients consuming the device shared using MPS.
+// TODO: Set CUDA_VISIBLE_DEVICES to include only the devices for this resource type.
+func (d *Daemon) EnvVars() envvars {
+	return map[string]string{
+		"CUDA_MPS_PIPE_DIRECTORY": d.PipeDir(),
+		"CUDA_MPS_LOG_DIRECTORY":  d.LogDir(),
+	}
+}
+
+// Start starts the MPS deamon as a background process.
+func (d *Daemon) Start() error {
+	if err := d.setComputeMode(computeModeExclusiveProcess); err != nil {
+		return fmt.Errorf("error setting compute mode %v: %w", computeModeExclusiveProcess, err)
+	}
+
+	klog.InfoS("Staring MPS daemon", "resource", d.rm.Resource())
+
+	pipeDir := d.PipeDir()
+	if err := os.MkdirAll(pipeDir, 0755); err != nil {
+		return fmt.Errorf("error creating directory %v: %w", pipeDir, err)
+	}
+
+	if err := setSELinuxContext(pipeDir, unprivilegedContainerSELinuxLabel); err != nil {
+		return fmt.Errorf("error setting SELinux context: %w", err)
+	}
+
+	logDir := d.LogDir()
+	if err := os.MkdirAll(logDir, 0755); err != nil {
+		return fmt.Errorf("error creating directory %v: %w", logDir, err)
+	}
+
+	mpsDaemon := exec.Command(mpsControlBin, "-d")
+	mpsDaemon.Env = append(mpsDaemon.Env, d.EnvVars().toSlice()...)
+	if err := mpsDaemon.Run(); err != nil {
+		return err
+	}
+
+	for index, limit := range d.perDevicePinnedDeviceMemoryLimits() {
+		_, err := d.EchoPipeToControl(fmt.Sprintf("set_default_device_pinned_mem_limit %s %s", index, limit))
+		if err != nil {
+			return fmt.Errorf("error setting pinned memory limit for device %v: %w", index, err)
+		}
+	}
+	if threadPercentage := d.activeThreadPercentage(); threadPercentage != "" {
+		_, err := d.EchoPipeToControl(fmt.Sprintf("set_default_active_thread_percentage %s", threadPercentage))
+		if err != nil {
+			return fmt.Errorf("error setting active thread percentage: %w", err)
+		}
+	}
+
+	statusFile, err := os.Create(d.startedFile())
+	if err != nil {
+		return err
+	}
+	defer statusFile.Close()
+
+	d.logTailer = newTailer(filepath.Join(logDir, "control.log"))
+	klog.InfoS("Starting log tailer", "resource", d.rm.Resource())
+	if err := d.logTailer.Start(); err != nil {
+		klog.ErrorS(err, "Could not start tail command on control.log; ignoring logs")
+	}
+
+	return nil
+}
+
+func setSELinuxContext(path string, context string) error {
+	_, err := os.Stat("/sys/fs/selinux")
+	if err != nil && errors.Is(err, os.ErrNotExist) {
+		klog.InfoS("SELinux disabled, not updating context", "path", path)
+		return nil
+	} else if err != nil {
+		return fmt.Errorf("error checking if SELinux is enabled: %w", err)
+	}
+
+	klog.InfoS("SELinux enabled, setting context", "path", path, "context", context)
+	return selinux.Chcon(path, context, true)
+}
+
+// Stop ensures that the MPS daemon is quit.
+func (d *Daemon) Stop() error {
+	_, err := d.EchoPipeToControl("quit")
+	if err != nil {
+		return fmt.Errorf("error sending quit message: %w", err)
+	}
+	klog.InfoS("Stopped MPS control daemon", "resource", d.rm.Resource())
+
+	err = d.logTailer.Stop()
+	klog.InfoS("Stopped log tailer", "resource", d.rm.Resource(), "error", err)
+
+	if err := d.setComputeMode(computeModeDefault); err != nil {
+		return fmt.Errorf("error setting compute mode %v: %w", computeModeDefault, err)
+	}
+
+	if err := os.Remove(d.startedFile()); err != nil && err != os.ErrNotExist {
+		return fmt.Errorf("failed to remove started file: %w", err)
+	}
+
+	logDir := d.LogDir()
+	if err := os.RemoveAll(logDir); err != nil {
+		klog.ErrorS(err, "Failed to remove pipe directory", "path", logDir)
+	}
+
+	return nil
+}
+
+func (d *Daemon) LogDir() string {
+	return d.root.LogDir(d.rm.Resource())
+}
+
+func (d *Daemon) PipeDir() string {
+	return d.root.PipeDir(d.rm.Resource())
+}
+
+func (d *Daemon) ShmDir() string {
+	return "/dev/shm"
+}
+
+func (d *Daemon) startedFile() string {
+	return d.root.startedFile(d.rm.Resource())
+}
+
+// AssertHealthy checks that the MPS control daemon is healthy.
+func (d *Daemon) AssertHealthy() error {
+	_, err := d.EchoPipeToControl("get_default_active_thread_percentage")
+	return err
+}
+
+// EchoPipeToControl sends the specified command to the MPS control daemon.
+func (d *Daemon) EchoPipeToControl(command string) (string, error) {
+	var out bytes.Buffer
+	reader, writer := io.Pipe()
+	defer writer.Close()
+	defer reader.Close()
+
+	mpsDaemon := exec.Command(mpsControlBin)
+	mpsDaemon.Env = append(mpsDaemon.Env, d.EnvVars().toSlice()...)
+
+	mpsDaemon.Stdin = reader
+	mpsDaemon.Stdout = &out
+
+	if err := mpsDaemon.Start(); err != nil {
+		return "", fmt.Errorf("failed to start NVIDIA MPS command: %w", err)
+	}
+
+	if _, err := writer.Write([]byte(command)); err != nil {
+		return "", fmt.Errorf("failed to write message to pipe: %w", err)
+	}
+	_ = writer.Close()
+
+	if err := mpsDaemon.Wait(); err != nil {
+		return "", fmt.Errorf("failed to send command to MPS daemon: %w", err)
+	}
+	return out.String(), nil
+}
+
+func (d *Daemon) setComputeMode(mode computeMode) error {
+	for _, uuid := range d.Devices().GetUUIDs() {
+		cmd := exec.Command(
+			"nvidia-smi",
+			"-i", uuid,
+			"-c", string(mode))
+		output, err := cmd.CombinedOutput()
+		if err != nil {
+			klog.Errorf("\n%v", string(output))
+			return fmt.Errorf("error running nvidia-smi: %w", err)
+		}
+	}
+	return nil
+}
+
+// perDevicePinnedMemoryLimits returns the pinned memory limits for each device.
+func (m *Daemon) perDevicePinnedDeviceMemoryLimits() map[string]string {
+	totalMemoryInBytesPerDevice := make(map[string]uint64)
+	replicasPerDevice := make(map[string]uint64)
+	for _, device := range m.Devices() {
+		index := device.Index
+		totalMemoryInBytesPerDevice[index] = device.TotalMemory
+		replicasPerDevice[index] += 1
+	}
+
+	limits := make(map[string]string)
+	for index, totalMemory := range totalMemoryInBytesPerDevice {
+		if totalMemory == 0 {
+			continue
+		}
+		replicas := replicasPerDevice[index]
+		limits[index] = fmt.Sprintf("%vM", totalMemory/replicas/1024/1024)
+	}
+	return limits
+}
+
+func (m *Daemon) activeThreadPercentage() string {
+	if len(m.Devices()) == 0 {
+		return ""
+	}
+	replicasPerDevice := len(m.Devices()) / len(m.Devices().GetUUIDs())
+
+	return fmt.Sprintf("%d", 100/replicasPerDevice)
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/device.go b/pkg/nvidia-plugin/mps-control-daemon/mps/device.go
new file mode 100644
index 000000000..bd8b1bf3c
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/device.go
@@ -0,0 +1,55 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"golang.org/x/mod/semver"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+)
+
+var errInvalidDevice = errors.New("invalid device")
+
+// mpsDevice represents an MPS-specific alias for an rm.Device.
+type mpsDevice rm.Device
+
+// assertReplicas checks whether the number of replicas specified is valid.
+func (d *mpsDevice) assertReplicas() error {
+	maxClients := d.maxClients()
+	if d.Replicas > maxClients {
+		return fmt.Errorf("%w maximum allowed replicas exceeded: %d > %d", errInvalidDevice, d.Replicas, maxClients)
+	}
+	return nil
+}
+
+// maxClients returns the maximum number of clients supported by an MPS server.
+func (d *mpsDevice) maxClients() int {
+	if d.isAtLeastVolta() {
+		return 48
+	}
+	return 16
+}
+
+// isAtLeastVolta checks whether the specified device is a volta device or newer.
+func (d *mpsDevice) isAtLeastVolta() bool {
+	vCc := "v" + strings.TrimPrefix(d.ComputeCapability, "v")
+	return semver.Compare(semver.Canonical(vCc), semver.Canonical("v7.5")) >= 0
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go b/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
new file mode 100644
index 000000000..17cef28ea
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
@@ -0,0 +1,112 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestDevice(t *testing.T) {
+	testCases := []struct {
+		description            string
+		input                  mpsDevice
+		expectedIsAtLeastVolta bool
+		expectedMaxClients     int
+		expectedAssertReplicas error
+	}{
+		{
+			description: "leading v ignored",
+			input: mpsDevice{
+				ComputeCapability: "v7.5",
+			},
+			expectedIsAtLeastVolta: true,
+			expectedMaxClients:     48,
+		},
+		{
+			description: "no-leading v supported",
+			input: mpsDevice{
+				ComputeCapability: "7.5",
+			},
+			expectedIsAtLeastVolta: true,
+			expectedMaxClients:     48,
+		},
+		{
+			description: "pre-volta clients",
+			input: mpsDevice{
+				ComputeCapability: "7.0",
+			},
+			expectedIsAtLeastVolta: false,
+			expectedMaxClients:     16,
+		},
+		{
+			description: "post-volta clients",
+			input: mpsDevice{
+				ComputeCapability: "9.0",
+			},
+			expectedIsAtLeastVolta: true,
+			expectedMaxClients:     48,
+		},
+		{
+			description: "pre-volta clients exceeded",
+			input: mpsDevice{
+				ComputeCapability: "7.0",
+				Replicas:          29,
+			},
+			expectedIsAtLeastVolta: false,
+			expectedMaxClients:     16,
+			expectedAssertReplicas: errInvalidDevice,
+		},
+		{
+			description: "post-volta clients exceeded",
+			input: mpsDevice{
+				ComputeCapability: "9.0",
+				Replicas:          49,
+			},
+			expectedIsAtLeastVolta: true,
+			expectedMaxClients:     48,
+			expectedAssertReplicas: errInvalidDevice,
+		},
+		{
+			description: "pre-volta clients max",
+			input: mpsDevice{
+				ComputeCapability: "7.0",
+				Replicas:          16,
+			},
+			expectedIsAtLeastVolta: false,
+			expectedMaxClients:     16,
+		},
+		{
+			description: "post-volta clients max",
+			input: mpsDevice{
+				ComputeCapability: "9.0",
+				Replicas:          48,
+			},
+			expectedIsAtLeastVolta: true,
+			expectedMaxClients:     48,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			require.Equal(t, tc.expectedIsAtLeastVolta, tc.input.isAtLeastVolta())
+			require.Equal(t, tc.expectedMaxClients, tc.input.maxClients())
+			require.ErrorIs(t, tc.input.assertReplicas(), tc.expectedAssertReplicas)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go b/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
new file mode 100644
index 000000000..d9fb87b84
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
@@ -0,0 +1,69 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"context"
+	"os"
+	"os/exec"
+)
+
+// tailer tails the contents of a file.
+type tailer struct {
+	filename string
+	cmd      *exec.Cmd
+	cancel   context.CancelFunc
+}
+
+// newTailer creates a tailer.
+func newTailer(filename string) *tailer {
+	return &tailer{
+		filename: filename,
+	}
+}
+
+// Start starts tailing the specified filename.
+func (t *tailer) Start() error {
+	ctx, cancel := context.WithCancel(context.Background())
+	t.cancel = cancel
+
+	//nolint:gosec // G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
+	cmd := exec.CommandContext(ctx, "tail", "-n", "+1", "-f", t.filename)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	t.cmd = cmd
+	return nil
+}
+
+// Stop stops the tailer.
+// The associated cancel function is called after which the command wait is
+// called -- if applicable.
+func (t *tailer) Stop() error {
+	if t.cancel != nil {
+		t.cancel()
+	}
+
+	if t.cmd == nil {
+		return nil
+	}
+
+	return t.cmd.Wait()
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go b/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
new file mode 100644
index 000000000..719a358e6
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
@@ -0,0 +1,112 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+)
+
+type Manager interface {
+	Daemons() ([]*Daemon, error)
+}
+
+type manager struct {
+	infolib   info.Interface
+	nvmllib   nvml.Interface
+	devicelib device.Interface
+	config    *nvidia.DeviceConfig
+}
+
+type nullManager struct{}
+
+// Daemons creates the required set of MPS daemons for the specified options.
+func NewDaemons(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) ([]*Daemon, error) {
+	manager, err := New(infolib, nvmllib, devicelib, opts...)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create MPS manager: %w", err)
+	}
+	return manager.Daemons()
+}
+
+// New creates a manager for MPS daemons.
+// If MPS is not configured, a manager is returned that manages no daemons.
+func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) (Manager, error) {
+	m := &manager{
+		infolib:   infolib,
+		nvmllib:   nvmllib,
+		devicelib: devicelib,
+	}
+	for _, opt := range opts {
+		opt(m)
+	}
+
+	if strategy := m.config.Sharing.SharingStrategy(); strategy != spec.SharingStrategyMPS {
+		klog.InfoS("Sharing strategy is not MPS; skipping MPS manager creation", "strategy", strategy)
+		return &nullManager{}, nil
+	}
+
+	return m, nil
+}
+
+func (m *manager) Daemons() ([]*Daemon, error) {
+	resourceManagers, err := rm.NewNVMLResourceManagers(m.infolib, m.nvmllib, m.devicelib, m.config)
+	if err != nil {
+		return nil, err
+	}
+	var daemons []*Daemon
+	for _, resourceManager := range resourceManagers {
+		// We don't create daemons if there are no devices associated with the resource manager.
+		if len(resourceManager.Devices()) == 0 {
+			klog.InfoS("No devices associated with resource", "resource", resourceManager.Resource())
+			continue
+		}
+		// Check if the resources are shared.
+		// TODO: We should add a more explicit check for MPS specifically
+		if !rm.AnnotatedIDs(resourceManager.Devices().GetIDs()).AnyHasAnnotations() {
+			klog.InfoS("Resource is not shared", "resource", "resource", resourceManager.Resource())
+			continue
+		}
+		// Check if MIG devices are included.
+		for _, rmDevice := range resourceManager.Devices() {
+			if rmDevice.IsMigDevice() {
+				klog.Warning("MPS sharing is not supported for MIG devices; skipping daemon creation")
+				continue
+			}
+			if err := (*mpsDevice)(rmDevice).assertReplicas(); err != nil {
+				return nil, fmt.Errorf("invalid MPS configuration: %w", err)
+			}
+		}
+		daemon := NewDaemon(resourceManager, ContainerRoot)
+		daemons = append(daemons, daemon)
+	}
+
+	return daemons, nil
+}
+
+// Daemons always returns an empty slice for a nullManager.
+func (m *nullManager) Daemons() ([]*Daemon, error) {
+	return nil, nil
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/options.go b/pkg/nvidia-plugin/mps-control-daemon/mps/options.go
new file mode 100644
index 000000000..ca97d122f
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/options.go
@@ -0,0 +1,29 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import "github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+
+// Option defines a functional option for configuring an MPS manager.
+type Option func(*manager)
+
+// WithConfig sets the config associated with the MPS manager.
+func WithConfig(config *nvidia.DeviceConfig) Option {
+	return func(m *manager) {
+		m.config = config
+	}
+}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/root.go b/pkg/nvidia-plugin/mps-control-daemon/mps/root.go
new file mode 100644
index 000000000..9c2e105f8
--- /dev/null
+++ b/pkg/nvidia-plugin/mps-control-daemon/mps/root.go
@@ -0,0 +1,59 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mps
+
+import (
+	"path/filepath"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+const (
+	ContainerRoot = Root("/mps")
+)
+
+// Root represents an MPS root.
+// This is where per-resource pipe and log directories are created.
+// For containerised applications the host root is typically mounted to /mps in the container.
+type Root string
+
+// LogDir returns the per-resource pipe dir for the specified root.
+func (r Root) LogDir(resourceName spec.ResourceName) string {
+	return r.Path(string(resourceName), "log")
+}
+
+// PipeDir returns the per-resource pipe dir for the specified root.
+func (r Root) PipeDir(resourceName spec.ResourceName) string {
+	return r.Path(string(resourceName), "pipe")
+}
+
+// ShmDir returns the shm dir associated with the root.
+// Note that the shm dir is the same for all resources.
+func (r Root) ShmDir(resourceName spec.ResourceName) string {
+	return r.Path("shm")
+}
+
+// startedFile returns the per-resource .started file name for the specified root.
+func (r Root) startedFile(resourceName spec.ResourceName) string {
+	return r.Path(string(resourceName), ".started")
+}
+
+// Path returns a path relative to the MPS root.
+func (r Root) Path(parts ...string) string {
+	pathparts := append([]string{string(r)}, parts...)
+	return filepath.Join(pathparts...)
+}
diff --git a/pkg/nvidia-plugin/pkg/cdi/api.go b/pkg/nvidia-plugin/pkg/cdi/api.go
new file mode 100644
index 000000000..29ea99ce7
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cdi/api.go
@@ -0,0 +1,31 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cdi
+
+import "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
+
+// Interface provides the API to the 'cdi' package
+//
+//go:generate moq -stub -out api_mock.go . Interface
+type Interface interface {
+	CreateSpecFile() error
+	QualifiedName(string, string) string
+}
+
+type cdiSpecGenerator interface {
+	GetSpec() (spec.Interface, error)
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/cdi/api_mock.go b/pkg/nvidia-plugin/pkg/cdi/api_mock.go
similarity index 100%
rename from pkg/device-plugin/nvidiadevice/nvinternal/cdi/api_mock.go
rename to pkg/nvidia-plugin/pkg/cdi/api_mock.go
diff --git a/pkg/nvidia-plugin/pkg/cdi/cdi.go b/pkg/nvidia-plugin/pkg/cdi/cdi.go
new file mode 100644
index 000000000..b3227d437
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cdi/cdi.go
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cdi
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
+	"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
+	transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
+	"github.com/sirupsen/logrus"
+	"k8s.io/klog/v2"
+	cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
+	cdiparser "tags.cncf.io/container-device-interface/pkg/parser"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+)
+
+const (
+	cdiRoot = "/var/run/cdi"
+)
+
+// cdiHandler creates CDI specs for devices assocatied with the device plugin
+type cdiHandler struct {
+	infolib   info.Interface
+	nvmllib   nvml.Interface
+	devicelib device.Interface
+
+	logger           *logrus.Logger
+	driverRoot       string
+	devRoot          string
+	targetDriverRoot string
+	targetDevRoot    string
+	nvidiaCTKPath    string
+	vendor           string
+	deviceIDStrategy string
+
+	deviceListStrategies spec.DeviceListStrategies
+
+	gdsEnabled   bool
+	mofedEnabled bool
+
+	imexChannels imex.Channels
+
+	cdilibs map[string]cdiSpecGenerator
+}
+
+var _ Interface = &cdiHandler{}
+
+// New constructs a new instance of the 'cdi' interface
+func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) (Interface, error) {
+	c := &cdiHandler{
+		infolib:   infolib,
+		nvmllib:   nvmllib,
+		devicelib: devicelib,
+	}
+	for _, opt := range opts {
+		opt(c)
+	}
+
+	if !c.deviceListStrategies.AnyCDIEnabled() {
+		return &null{}, nil
+	}
+	hasNVML, _ := infolib.HasNvml()
+	if !hasNVML {
+		klog.Warning("No valid resources detected, creating a null CDI handler")
+		return &null{}, nil
+	}
+
+	if c.logger == nil {
+		c.logger = logrus.StandardLogger()
+	}
+	if c.deviceIDStrategy == "" {
+		c.deviceIDStrategy = "uuid"
+	}
+	if c.driverRoot == "" {
+		c.driverRoot = "/"
+	}
+	if c.devRoot == "" {
+		c.devRoot = c.driverRoot
+	}
+	if c.targetDriverRoot == "" {
+		c.targetDriverRoot = c.driverRoot
+	}
+	if c.targetDevRoot == "" {
+		c.targetDevRoot = c.devRoot
+	}
+
+	deviceNamer, err := nvcdi.NewDeviceNamer(c.deviceIDStrategy)
+	if err != nil {
+		return nil, err
+	}
+
+	c.cdilibs = make(map[string]cdiSpecGenerator)
+
+	c.cdilibs["gpu"], err = nvcdi.New(
+		nvcdi.WithInfoLib(c.infolib),
+		nvcdi.WithNvmlLib(c.nvmllib),
+		nvcdi.WithDeviceLib(c.devicelib),
+		nvcdi.WithLogger(c.logger),
+		nvcdi.WithNVIDIACDIHookPath(c.nvidiaCTKPath),
+		nvcdi.WithDriverRoot(c.driverRoot),
+		nvcdi.WithDevRoot(c.devRoot),
+		nvcdi.WithDeviceNamers(deviceNamer),
+		nvcdi.WithVendor(c.vendor),
+		nvcdi.WithClass("gpu"),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create nvcdi library: %v", err)
+	}
+
+	if len(c.imexChannels) > 0 {
+		c.cdilibs["imex-channel"] = c.newImexChannelSpecGenerator()
+	}
+
+	var additionalModes []string
+	if c.gdsEnabled {
+		additionalModes = append(additionalModes, "gds")
+	}
+	if c.mofedEnabled {
+		additionalModes = append(additionalModes, "mofed")
+	}
+
+	for _, mode := range additionalModes {
+		lib, err := nvcdi.New(
+			nvcdi.WithInfoLib(c.infolib),
+			nvcdi.WithLogger(c.logger),
+			nvcdi.WithNVIDIACDIHookPath(c.nvidiaCTKPath),
+			nvcdi.WithDriverRoot(c.driverRoot),
+			nvcdi.WithDevRoot(c.devRoot),
+			nvcdi.WithVendor(c.vendor),
+			nvcdi.WithMode(mode),
+		)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create nvcdi library: %v", err)
+		}
+		c.cdilibs[mode] = lib
+	}
+
+	return c, nil
+}
+
+// CreateSpecFile creates a CDI spec file for the specified devices.
+func (cdi *cdiHandler) CreateSpecFile() error {
+	for class, cdilib := range cdi.cdilibs {
+		cdi.logger.Infof("Generating CDI spec for resource: %s/%s", cdi.vendor, class)
+
+		if class == "gpu" {
+			ret := cdi.nvmllib.Init()
+			if ret != nvml.SUCCESS {
+				return fmt.Errorf("failed to initialize NVML: %v", ret)
+			}
+			defer func() {
+				_ = cdi.nvmllib.Shutdown()
+			}()
+		}
+
+		spec, err := cdilib.GetSpec()
+		if err != nil {
+			return fmt.Errorf("failed to get CDI spec: %v", err)
+		}
+
+		// TODO: Once the NewDriverTransformer is merged in container-toolkit we can instantiate it directly.
+		transformer := cdi.getRootTransformer()
+		if err := transformer.Transform(spec.Raw()); err != nil {
+			return fmt.Errorf("failed to transform driver root in CDI spec: %v", err)
+		}
+
+		specName, err := cdiapi.GenerateNameForSpec(spec.Raw())
+		if err != nil {
+			return fmt.Errorf("failed to generate spec name: %v", err)
+		}
+
+		err = spec.Save(filepath.Join(cdiRoot, specName+".json"))
+		if err != nil {
+			return fmt.Errorf("failed to save CDI spec: %v", err)
+		}
+	}
+
+	return nil
+}
+
+func (cdi *cdiHandler) getRootTransformer() transform.Transformer {
+	driverRootTransformer := transformroot.New(
+		transformroot.WithRoot(cdi.driverRoot),
+		transformroot.WithTargetRoot(cdi.targetDriverRoot),
+		transformroot.WithRelativeTo("host"),
+	)
+
+	if cdi.devRoot == cdi.driverRoot || cdi.devRoot == "" {
+		return driverRootTransformer
+	}
+
+	ensureDev := func(p string) string {
+		return filepath.Join(strings.TrimSuffix(filepath.Clean(p), "/dev"), "/dev")
+	}
+
+	devRootTransformer := transformroot.New(
+		transformroot.WithRoot(ensureDev(cdi.devRoot)),
+		transformroot.WithTargetRoot(ensureDev(cdi.targetDevRoot)),
+		transformroot.WithRelativeTo("host"),
+	)
+
+	return transform.Merge(driverRootTransformer, devRootTransformer)
+}
+
+// QualifiedName constructs a CDI qualified device name for the specified resources.
+// Note: This assumes that the specified id matches the device name returned by the naming strategy.
+func (cdi *cdiHandler) QualifiedName(class string, id string) string {
+	return cdiparser.QualifiedName(cdi.vendor, class, id)
+}
diff --git a/pkg/nvidia-plugin/pkg/cdi/imex.go b/pkg/nvidia-plugin/pkg/cdi/imex.go
new file mode 100644
index 000000000..38aaa0a41
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cdi/imex.go
@@ -0,0 +1,63 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cdi
+
+import (
+	"tags.cncf.io/container-device-interface/specs-go"
+
+	"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+)
+
+type imexChannelCDILib struct {
+	vendor       string
+	imexChannels imex.Channels
+}
+
+func (cdi *cdiHandler) newImexChannelSpecGenerator() cdiSpecGenerator {
+	lib := &imexChannelCDILib{
+		vendor:       cdi.vendor,
+		imexChannels: cdi.imexChannels,
+	}
+
+	return lib
+}
+
+// GetSpec returns the CDI specs for IMEX channels.
+func (l *imexChannelCDILib) GetSpec() (spec.Interface, error) {
+	var deviceSpecs []specs.Device
+	for _, channel := range l.imexChannels {
+		deviceSpec := specs.Device{
+			Name: channel.ID,
+			ContainerEdits: specs.ContainerEdits{
+				DeviceNodes: []*specs.DeviceNode{
+					{
+						Path:     channel.Path,
+						HostPath: channel.HostPath,
+					},
+				},
+			},
+		}
+		deviceSpecs = append(deviceSpecs, deviceSpec)
+	}
+	return spec.New(
+		spec.WithDeviceSpecs(deviceSpecs),
+		spec.WithVendor(l.vendor),
+		spec.WithClass("imex-channel"),
+	)
+}
diff --git a/pkg/nvidia-plugin/pkg/cdi/null.go b/pkg/nvidia-plugin/pkg/cdi/null.go
new file mode 100644
index 000000000..16ccead0a
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cdi/null.go
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cdi
+
+import (
+	"k8s.io/klog/v2"
+)
+
+type null struct{}
+
+var _ Interface = &null{}
+
+// NewNullHandler returns an instance of the 'cdi' interface that can
+// be used when CDI specs are not required.
+func NewNullHandler() Interface {
+	return &null{}
+}
+
+// CreateSpecFile is a no-op for the null handler.
+func (n *null) CreateSpecFile() error {
+	return nil
+}
+
+// QualifiedName is a no-op for the null handler. A error message is logged
+// inidicating this should never be called for the null handler.
+func (n *null) QualifiedName(class string, id string) string {
+	klog.Error("cannot return a qualified CDI device name with the null CDI handler")
+	return ""
+}
diff --git a/pkg/nvidia-plugin/pkg/cdi/options.go b/pkg/nvidia-plugin/pkg/cdi/options.go
new file mode 100644
index 000000000..392e744a6
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cdi/options.go
@@ -0,0 +1,102 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cdi
+
+import (
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+)
+
+// Option defines a function for passing options to the New() call
+type Option func(*cdiHandler)
+
+// WithDeviceListStrategies provides an Option to set the enabled flag used by the 'cdi' interface
+func WithDeviceListStrategies(deviceListStrategies spec.DeviceListStrategies) Option {
+	return func(c *cdiHandler) {
+		c.deviceListStrategies = deviceListStrategies
+	}
+}
+
+// WithDriverRoot provides an Option to set the driver root used by the 'cdi' interface.
+func WithDriverRoot(root string) Option {
+	return func(c *cdiHandler) {
+		c.driverRoot = root
+	}
+}
+
+// WithDevRoot sets the dev root for the `cdi` interface.
+func WithDevRoot(root string) Option {
+	return func(c *cdiHandler) {
+		c.devRoot = root
+	}
+}
+
+// WithTargetDriverRoot provides an Option to set the target (host) driver root used by the 'cdi' interface
+func WithTargetDriverRoot(root string) Option {
+	return func(c *cdiHandler) {
+		c.targetDriverRoot = root
+	}
+}
+
+// WithTargetDevRoot provides an Option to set the target (host) dev root used by the 'cdi' interface
+func WithTargetDevRoot(root string) Option {
+	return func(c *cdiHandler) {
+		c.targetDevRoot = root
+	}
+}
+
+// WithNvidiaCTKPath provides an Option to set the nvidia-ctk path used by the 'cdi' interface
+func WithNvidiaCTKPath(path string) Option {
+	return func(c *cdiHandler) {
+		c.nvidiaCTKPath = path
+	}
+}
+
+// WithDeviceIDStrategy provides an Option to set the device ID strategy used by the 'cdi' interface
+func WithDeviceIDStrategy(strategy string) Option {
+	return func(c *cdiHandler) {
+		c.deviceIDStrategy = strategy
+	}
+}
+
+// WithVendor provides an Option to set the vendor used by the 'cdi' interface
+func WithVendor(vendor string) Option {
+	return func(c *cdiHandler) {
+		c.vendor = vendor
+	}
+}
+
+// WithGdsEnabled provides and option to set whether a GDS CDI spec should be generated
+func WithGdsEnabled(enabled bool) Option {
+	return func(c *cdiHandler) {
+		c.gdsEnabled = enabled
+	}
+}
+
+// WithMofedEnabled provides and option to set whether a MOFED CDI spec should be generated
+func WithMofedEnabled(enabled bool) Option {
+	return func(c *cdiHandler) {
+		c.mofedEnabled = enabled
+	}
+}
+
+// WithImexChannels sets the IMEX channels for which CDI specs should be generated.
+func WithImexChannels(imexChannels imex.Channels) Option {
+	return func(c *cdiHandler) {
+		c.imexChannels = imexChannels
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/cuda/api.go b/pkg/nvidia-plugin/pkg/cuda/api.go
new file mode 100644
index 000000000..e43ce4a2b
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/api.go
@@ -0,0 +1,119 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cuda
+
+import (
+	"github.com/NVIDIA/go-nvml/pkg/dl"
+)
+
+const (
+	libraryName      = "libcuda.so.1"
+	libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
+)
+
+// cuda stores a reference the cuda dynamic library
+var cuda *dl.DynamicLibrary
+
+// Init calls cuInit and initialized the library
+func Init() Result {
+	lib := dl.New(libraryName, libraryLoadFlags)
+	if err := lib.Open(); err != nil {
+		return ERROR_UNKNOWN
+	}
+	cuda = lib
+
+	if err := cuda.Lookup("cuInit"); err != nil {
+		return ERROR_UNKNOWN
+	}
+
+	return cuInit(0)
+}
+
+// Shutdown ensures that the CUDA library is unloaded.
+func Shutdown() Result {
+	if cuda == nil {
+		return SUCCESS
+	}
+	if err := cuda.Close(); err != nil {
+		return ERROR_UNKNOWN
+	}
+	return SUCCESS
+}
+
+// DriverGetVersion returns the driver version as an int.
+func DriverGetVersion() (int, Result) {
+	var version int32
+	r := cuDriverGetVersion(&version)
+
+	return int(version), r
+}
+
+// DeviceGet returns the device with the specified index.
+func DeviceGet(index int) (Device, Result) {
+	var device Device
+	//nolint:gosec  // Since index is internal-only, we ignore possible overflow errors here.
+	r := cuDeviceGet(&device, int32(index))
+
+	return device, r
+}
+
+// DeviceGetAttribute returns the specified attribute for the specified device.
+func DeviceGetAttribute(attribute DeviceAttribute, device Device) (int, Result) {
+	var value int32
+	r := cuDeviceGetAttribute(&value, attribute, device)
+	return int(value), r
+}
+
+// DeviceGetCount returns the number of CUDA-capable devices available
+func DeviceGetCount() (int, Result) {
+	var count int32
+	r := cuDeviceGetCount(&count)
+	return int(count), r
+}
+
+// GetAttribute converts the DeviceGetAttribute function to a device method
+func (device Device) GetAttribute(attribute DeviceAttribute) (int, Result) {
+	return DeviceGetAttribute(attribute, device)
+}
+
+// DeviceGetName returns the name of the specified device.
+func DeviceGetName(device Device) (string, Result) {
+	len := int32(96)
+	name := make([]byte, len)
+
+	r := cuDeviceGetName(&name[0], len, device)
+
+	return string(name[:clen(name)]), r
+}
+
+// GetName converts the DeviceGetname function to a device method
+func (device Device) GetName() (string, Result) {
+	return DeviceGetName(device)
+}
+
+// DeviceTotalMem returns the total memory for the specified device
+func DeviceTotalMem(device Device) (uint64, Result) {
+	var bytes uint64
+	r := cuDeviceTotalMem(&bytes, device)
+
+	return bytes, r
+}
+
+// TotalMem converts the DeviceTotalMem function to a device method
+func (device Device) TotalMem() (uint64, Result) {
+	return DeviceTotalMem(device)
+}
diff --git a/pkg/nvidia-plugin/pkg/cuda/cgo_helpers.go b/pkg/nvidia-plugin/pkg/cuda/cgo_helpers.go
new file mode 100644
index 000000000..24fe6b8e7
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/cgo_helpers.go
@@ -0,0 +1,27 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cuda
+
+// clen return the length of a C string stored in a byte slice
+func clen(n []byte) int {
+	for i := 0; i < len(n); i++ {
+		if n[i] == 0 {
+			return i
+		}
+	}
+	return len(n)
+}
diff --git a/pkg/nvidia-plugin/pkg/cuda/consts.go b/pkg/nvidia-plugin/pkg/cuda/consts.go
new file mode 100644
index 000000000..f392ebf11
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/consts.go
@@ -0,0 +1,95 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cuda
+
+// Result represents the CUresult return type.
+type Result int32
+
+const (
+	SUCCESS                              Result = 0
+	ERROR_INVALID_VALUE                  Result = 1
+	ERROR_OUT_OF_MEMORY                  Result = 2
+	ERROR_NOT_INITIALIZED                Result = 3
+	ERROR_DEINITIALIZED                  Result = 4
+	ERROR_PROFILER_DISABLED              Result = 5
+	ERROR_PROFILER_NOT_INITIALIZED       Result = 6
+	ERROR_PROFILER_ALREADY_STARTED       Result = 7
+	ERROR_PROFILER_ALREADY_STOPPED       Result = 8
+	ERROR_NO_DEVICE                      Result = 100
+	ERROR_INVALID_DEVICE                 Result = 101
+	ERROR_INVALID_IMAGE                  Result = 200
+	ERROR_INVALID_CONTEXT                Result = 201
+	ERROR_CONTEXT_ALREADY_CURRENT        Result = 202
+	ERROR_MAP_FAILED                     Result = 205
+	ERROR_UNMAP_FAILED                   Result = 206
+	ERROR_ARRAY_IS_MAPPED                Result = 207
+	ERROR_ALREADY_MAPPED                 Result = 208
+	ERROR_NO_BINARY_FOR_GPU              Result = 209
+	ERROR_ALREADY_ACQUIRED               Result = 210
+	ERROR_NOT_MAPPED                     Result = 211
+	ERROR_NOT_MAPPED_AS_ARRAY            Result = 212
+	ERROR_NOT_MAPPED_AS_POINTER          Result = 213
+	ERROR_ECC_UNCORRECTABLE              Result = 214
+	ERROR_UNSUPPORTED_LIMIT              Result = 215
+	ERROR_CONTEXT_ALREADY_IN_USE         Result = 216
+	ERROR_PEER_ACCESS_UNSUPPORTED        Result = 217
+	ERROR_INVALID_PTX                    Result = 218
+	ERROR_INVALID_GRAPHICS_CONTEXT       Result = 219
+	ERROR_NVLINK_UNCORRECTABLE           Result = 220
+	ERROR_JIT_COMPILER_NOT_FOUND         Result = 221
+	ERROR_INVALID_SOURCE                 Result = 300
+	ERROR_FILE_NOT_FOUND                 Result = 301
+	ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND Result = 302
+	ERROR_SHARED_OBJECT_INIT_FAILED      Result = 303
+	ERROR_OPERATING_SYSTEM               Result = 304
+	ERROR_INVALID_HANDLE                 Result = 400
+	ERROR_NOT_FOUND                      Result = 500
+	ERROR_NOT_READY                      Result = 600
+	ERROR_ILLEGAL_ADDRESS                Result = 700
+	ERROR_LAUNCH_OUT_OF_RESOURCES        Result = 701
+	ERROR_LAUNCH_TIMEOUT                 Result = 702
+	ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  Result = 703
+	ERROR_PEER_ACCESS_ALREADY_ENABLED    Result = 704
+	ERROR_PEER_ACCESS_NOT_ENABLED        Result = 705
+	ERROR_PRIMARY_CONTEXT_ACTIVE         Result = 708
+	ERROR_CONTEXT_IS_DESTROYED           Result = 709
+	ERROR_ASSERT                         Result = 710
+	ERROR_TOO_MANY_PEERS                 Result = 711
+	ERROR_HOST_MEMORY_ALREADY_REGISTERED Result = 712
+	ERROR_HOST_MEMORY_NOT_REGISTERED     Result = 713
+	ERROR_HARDWARE_STACK_ERROR           Result = 714
+	ERROR_ILLEGAL_INSTRUCTION            Result = 715
+	ERROR_MISALIGNED_ADDRESS             Result = 716
+	ERROR_INVALID_ADDRESS_SPACE          Result = 717
+	ERROR_INVALID_PC                     Result = 718
+	ERROR_LAUNCH_FAILED                  Result = 719
+	ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   Result = 720
+	ERROR_NOT_PERMITTED                  Result = 800
+	ERROR_NOT_SUPPORTED                  Result = 801
+	ERROR_UNKNOWN                        Result = 99
+)
+
+// DeviceAttribute represents the CUdevice_attribute type
+type DeviceAttribute int32
+
+const (
+	COMPUTE_CAPABILITY_MAJOR DeviceAttribute = 75
+	COMPUTE_CAPABILITY_MINOR DeviceAttribute = 76
+)
+
+// Device represents a CUDA device handle
+type Device int32
diff --git a/pkg/nvidia-plugin/pkg/cuda/cuda.go b/pkg/nvidia-plugin/pkg/cuda/cuda.go
new file mode 100644
index 000000000..f44004923
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/cuda.go
@@ -0,0 +1,176 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cuda
+
+import (
+	"unsafe"
+)
+
+/*
+#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
+#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+typedef int CUdevice;
+
+typedef enum CUdevice_attribute_enum {
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
+} CUdevice_attribute;
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS                              = 0,
+    CUDA_ERROR_INVALID_VALUE                  = 1,
+    CUDA_ERROR_OUT_OF_MEMORY                  = 2,
+    CUDA_ERROR_NOT_INITIALIZED                = 3,
+    CUDA_ERROR_DEINITIALIZED                  = 4,
+    CUDA_ERROR_PROFILER_DISABLED              = 5,
+    CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
+    CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
+    CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
+    CUDA_ERROR_NO_DEVICE                      = 100,
+    CUDA_ERROR_INVALID_DEVICE                 = 101,
+    CUDA_ERROR_INVALID_IMAGE                  = 200,
+    CUDA_ERROR_INVALID_CONTEXT                = 201,
+    CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
+    CUDA_ERROR_MAP_FAILED                     = 205,
+    CUDA_ERROR_UNMAP_FAILED                   = 206,
+    CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
+    CUDA_ERROR_ALREADY_MAPPED                 = 208,
+    CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
+    CUDA_ERROR_ALREADY_ACQUIRED               = 210,
+    CUDA_ERROR_NOT_MAPPED                     = 211,
+    CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
+    CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
+    CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
+    CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
+    CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
+    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
+    CUDA_ERROR_INVALID_PTX                    = 218,
+    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
+    CUDA_ERROR_NVLINK_UNCORRECTABLE           = 220,
+    CUDA_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
+    CUDA_ERROR_INVALID_SOURCE                 = 300,
+    CUDA_ERROR_FILE_NOT_FOUND                 = 301,
+    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
+    CUDA_ERROR_OPERATING_SYSTEM               = 304,
+    CUDA_ERROR_INVALID_HANDLE                 = 400,
+    CUDA_ERROR_NOT_FOUND                      = 500,
+    CUDA_ERROR_NOT_READY                      = 600,
+    CUDA_ERROR_ILLEGAL_ADDRESS                = 700,
+    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
+    CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
+    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
+    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
+    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
+    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
+    CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,
+    CUDA_ERROR_ASSERT                         = 710,
+    CUDA_ERROR_TOO_MANY_PEERS                 = 711,
+    CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+    CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
+    CUDA_ERROR_HARDWARE_STACK_ERROR           = 714,
+    CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715,
+    CUDA_ERROR_MISALIGNED_ADDRESS             = 716,
+    CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717,
+    CUDA_ERROR_INVALID_PC                     = 718,
+    CUDA_ERROR_LAUNCH_FAILED                  = 719,
+    CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
+    CUDA_ERROR_NOT_PERMITTED                  = 800,
+    CUDA_ERROR_NOT_SUPPORTED                  = 801,
+    CUDA_ERROR_UNKNOWN                        = 999
+} CUresult;
+
+CUresult CUDAAPI cuInit(unsigned int Flags);
+CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
+CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
+CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
+CUresult CUDAAPI cuDeviceGetCount(int *count);
+CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
+CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
+*/
+import "C"
+
+// cuInit function as declared in cuda.h
+func cuInit(flags uint32) Result {
+	cFlags := (C.uint)(flags)
+	_ret := C.cuInit(cFlags)
+
+	return Result(_ret)
+}
+
+// cuDeviceGet function as declared in cuda.h
+func cuDeviceGet(device *Device, index int32) Result {
+	cDevice := (*C.CUdevice)(unsafe.Pointer(device))
+	cIndex := (C.int)(index)
+
+	_ret := C.cuDeviceGet(cDevice, cIndex)
+
+	return Result(_ret)
+}
+
+// cuDeviceGetAttribute function as declared in cuda.h
+func cuDeviceGetAttribute(value *int32, attribute DeviceAttribute, dev Device) Result {
+	cValue := (*C.int)(unsafe.Pointer(value))
+	cAttribute := (C.CUdevice_attribute)(attribute)
+	cDev := (C.CUdevice)(dev)
+
+	_ret := C.cuDeviceGetAttribute(cValue, cAttribute, cDev)
+
+	return Result(_ret)
+}
+
+// cuDeviceGetCount function as declared in cuda.h
+func cuDeviceGetCount(count *int32) Result {
+	cCount := (*C.int)(unsafe.Pointer(count))
+	_ret := C.cuDeviceGetCount(cCount)
+
+	return Result(_ret)
+}
+
+// cuDriverGetVersion function as declared in cuda.h
+func cuDriverGetVersion(version *int32) Result {
+	cVersion := (*C.int)(version)
+	_ret := C.cuDriverGetVersion(cVersion)
+
+	return Result(_ret)
+}
+
+// cuDeviceTotalMem function as declared in cuda.h
+func cuDeviceTotalMem(bytes *uint64, dev Device) Result {
+	cBytes := (*C.size_t)(unsafe.Pointer(bytes))
+	cDev := (C.CUdevice)(dev)
+	_ret := C.cuDeviceTotalMem(cBytes, cDev)
+
+	return Result(_ret)
+}
+
+// cuDeviceGetName function as declared in cuda.h
+func cuDeviceGetName(name *byte, len int32, dev Device) Result {
+	cName := (*C.char)(unsafe.Pointer(name))
+	cLen := (C.int)(len)
+	cDev := (C.CUdevice)(dev)
+	_ret := C.cuDeviceGetName(cName, cLen, cDev)
+
+	return Result(_ret)
+}
diff --git a/pkg/nvidia-plugin/pkg/cuda/device.go b/pkg/nvidia-plugin/pkg/cuda/device.go
new file mode 100644
index 000000000..ba8f71eb0
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/device.go
@@ -0,0 +1,17 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package cuda
diff --git a/pkg/nvidia-plugin/pkg/cuda/result.go b/pkg/nvidia-plugin/pkg/cuda/result.go
new file mode 100644
index 000000000..ae2e1a9e0
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/cuda/result.go
@@ -0,0 +1,178 @@
+/*
+* SPDX-License-Identifier: Apache-2.0
+*
+* The HAMi Contributors require contributions made to
+* this file be licensed under the Apache-2.0 license or a
+* compatible open source license.
+ */
+
+/*
+* Licensed to NVIDIA CORPORATION under one or more contributor
+* license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright
+* ownership. NVIDIA CORPORATION licenses this file to you under
+* the Apache License, Version 2.0 (the "License"); you may
+* not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+ */
+
+/*
+* Modifications Copyright The HAMi Authors. See
+* GitHub history for details.
+ */
+
+package cuda
+
+import (
+	"fmt"
+)
+
+// String returns the string representation of a Result
+func (r Result) String() string {
+	return errorStringFunc(r)
+}
+
+// Error returns the string representation of a Result
+func (r Result) Error() string {
+	return r.String()
+}
+
+var errorStringFunc = defaultErrorStringFunc
+
+var defaultErrorStringFunc = func(r Result) string {
+	switch r {
+	case SUCCESS:
+		return "CUDA_SUCCESS"
+	case ERROR_INVALID_VALUE:
+		return "CUDA_ERROR_INVALID_VALUE"
+	case ERROR_OUT_OF_MEMORY:
+		return "CUDA_ERROR_OUT_OF_MEMORY"
+	case ERROR_NOT_INITIALIZED:
+		return "CUDA_ERROR_NOT_INITIALIZED"
+	case ERROR_DEINITIALIZED:
+		return "CUDA_ERROR_DEINITIALIZED"
+	case ERROR_PROFILER_DISABLED:
+		return "CUDA_ERROR_PROFILER_DISABLED"
+	case ERROR_PROFILER_NOT_INITIALIZED:
+		return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"
+	case ERROR_PROFILER_ALREADY_STARTED:
+		return "CUDA_ERROR_PROFILER_ALREADY_STARTED"
+	case ERROR_PROFILER_ALREADY_STOPPED:
+		return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"
+	case ERROR_NO_DEVICE:
+		return "CUDA_ERROR_NO_DEVICE"
+	case ERROR_INVALID_DEVICE:
+		return "CUDA_ERROR_INVALID_DEVICE"
+	case ERROR_INVALID_IMAGE:
+		return "CUDA_ERROR_INVALID_IMAGE"
+	case ERROR_INVALID_CONTEXT:
+		return "CUDA_ERROR_INVALID_CONTEXT"
+	case ERROR_CONTEXT_ALREADY_CURRENT:
+		return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"
+	case ERROR_MAP_FAILED:
+		return "CUDA_ERROR_MAP_FAILED"
+	case ERROR_UNMAP_FAILED:
+		return "CUDA_ERROR_UNMAP_FAILED"
+	case ERROR_ARRAY_IS_MAPPED:
+		return "CUDA_ERROR_ARRAY_IS_MAPPED"
+	case ERROR_ALREADY_MAPPED:
+		return "CUDA_ERROR_ALREADY_MAPPED"
+	case ERROR_NO_BINARY_FOR_GPU:
+		return "CUDA_ERROR_NO_BINARY_FOR_GPU"
+	case ERROR_ALREADY_ACQUIRED:
+		return "CUDA_ERROR_ALREADY_ACQUIRED"
+	case ERROR_NOT_MAPPED:
+		return "CUDA_ERROR_NOT_MAPPED"
+	case ERROR_NOT_MAPPED_AS_ARRAY:
+		return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"
+	case ERROR_NOT_MAPPED_AS_POINTER:
+		return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"
+	case ERROR_ECC_UNCORRECTABLE:
+		return "CUDA_ERROR_ECC_UNCORRECTABLE"
+	case ERROR_UNSUPPORTED_LIMIT:
+		return "CUDA_ERROR_UNSUPPORTED_LIMIT"
+	case ERROR_CONTEXT_ALREADY_IN_USE:
+		return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"
+	case ERROR_PEER_ACCESS_UNSUPPORTED:
+		return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"
+	case ERROR_INVALID_PTX:
+		return "CUDA_ERROR_INVALID_PTX"
+	case ERROR_INVALID_GRAPHICS_CONTEXT:
+		return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"
+	case ERROR_NVLINK_UNCORRECTABLE:
+		return "CUDA_ERROR_NVLINK_UNCORRECTABLE"
+	case ERROR_JIT_COMPILER_NOT_FOUND:
+		return "CUDA_ERROR_JIT_COMPILER_NOT_FOUND"
+	case ERROR_INVALID_SOURCE:
+		return "CUDA_ERROR_INVALID_SOURCE"
+	case ERROR_FILE_NOT_FOUND:
+		return "CUDA_ERROR_FILE_NOT_FOUND"
+	case ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
+		return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"
+	case ERROR_SHARED_OBJECT_INIT_FAILED:
+		return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"
+	case ERROR_OPERATING_SYSTEM:
+		return "CUDA_ERROR_OPERATING_SYSTEM"
+	case ERROR_INVALID_HANDLE:
+		return "CUDA_ERROR_INVALID_HANDLE"
+	case ERROR_NOT_FOUND:
+		return "CUDA_ERROR_NOT_FOUND"
+	case ERROR_NOT_READY:
+		return "CUDA_ERROR_NOT_READY"
+	case ERROR_ILLEGAL_ADDRESS:
+		return "CUDA_ERROR_ILLEGAL_ADDRESS"
+	case ERROR_LAUNCH_OUT_OF_RESOURCES:
+		return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"
+	case ERROR_LAUNCH_TIMEOUT:
+		return "CUDA_ERROR_LAUNCH_TIMEOUT"
+	case ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
+		return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"
+	case ERROR_PEER_ACCESS_ALREADY_ENABLED:
+		return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"
+	case ERROR_PEER_ACCESS_NOT_ENABLED:
+		return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"
+	case ERROR_PRIMARY_CONTEXT_ACTIVE:
+		return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"
+	case ERROR_CONTEXT_IS_DESTROYED:
+		return "CUDA_ERROR_CONTEXT_IS_DESTROYED"
+	case ERROR_ASSERT:
+		return "CUDA_ERROR_ASSERT"
+	case ERROR_TOO_MANY_PEERS:
+		return "CUDA_ERROR_TOO_MANY_PEERS"
+	case ERROR_HOST_MEMORY_ALREADY_REGISTERED:
+		return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"
+	case ERROR_HOST_MEMORY_NOT_REGISTERED:
+		return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"
+	case ERROR_HARDWARE_STACK_ERROR:
+		return "CUDA_ERROR_HARDWARE_STACK_ERROR"
+	case ERROR_ILLEGAL_INSTRUCTION:
+		return "CUDA_ERROR_ILLEGAL_INSTRUCTION"
+	case ERROR_MISALIGNED_ADDRESS:
+		return "CUDA_ERROR_MISALIGNED_ADDRESS"
+	case ERROR_INVALID_ADDRESS_SPACE:
+		return "CUDA_ERROR_INVALID_ADDRESS_SPACE"
+	case ERROR_INVALID_PC:
+		return "CUDA_ERROR_INVALID_PC"
+	case ERROR_LAUNCH_FAILED:
+		return "CUDA_ERROR_LAUNCH_FAILED"
+	case ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
+		return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"
+	case ERROR_NOT_PERMITTED:
+		return "CUDA_ERROR_NOT_PERMITTED"
+	case ERROR_NOT_SUPPORTED:
+		return "CUDA_ERROR_NOT_SUPPORTED"
+	case ERROR_UNKNOWN:
+		return "CUDA_ERROR_UNKNOWN"
+	default:
+		return fmt.Sprintf("Unknown return value: %d", r)
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/dependencies/dependencies.go b/pkg/nvidia-plugin/pkg/dependencies/dependencies.go
new file mode 100644
index 000000000..f793fea9f
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/dependencies/dependencies.go
@@ -0,0 +1,7 @@
+//go:build dependencies
+// +build dependencies
+
+// Package dependencies records dependencies. It cannot actually be compiled.
+package dependencies
+
+import _ "github.com/NVIDIA/go-gpuallocator/gpuallocator"
diff --git a/pkg/nvidia-plugin/pkg/flags/kubeclient.go b/pkg/nvidia-plugin/pkg/flags/kubeclient.go
new file mode 100644
index 000000000..d0d105753
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/flags/kubeclient.go
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2023 The Kubernetes Authors.
+ * Copyright 2024 NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package flags
+
+import (
+	"fmt"
+
+	"github.com/urfave/cli/v2"
+
+	coreclientset "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+	nfdclientset "sigs.k8s.io/node-feature-discovery/pkg/generated/clientset/versioned"
+)
+
+type KubeClientConfig struct {
+	KubeConfig   string
+	KubeAPIQPS   float64
+	KubeAPIBurst int
+}
+
+type ClientSets struct {
+	Core coreclientset.Interface
+	NFD  nfdclientset.Interface
+}
+
+func (k *KubeClientConfig) Flags() []cli.Flag {
+	flags := []cli.Flag{
+		&cli.StringFlag{
+			Category:    "Kubernetes client:",
+			Name:        "kubeconfig",
+			Usage:       "Absolute path to the `KUBECONFIG` file. Either this flag or the KUBECONFIG env variable need to be set if the driver is being run out of cluster.",
+			Destination: &k.KubeConfig,
+			EnvVars:     []string{"KUBECONFIG"},
+		},
+		&cli.Float64Flag{
+			Category:    "Kubernetes client:",
+			Name:        "kube-api-qps",
+			Usage:       "`QPS` to use while communicating with the Kubernetes apiserver.",
+			Value:       5,
+			Destination: &k.KubeAPIQPS,
+			EnvVars:     []string{"KUBE_API_QPS"},
+		},
+		&cli.IntFlag{
+			Category:    "Kubernetes client:",
+			Name:        "kube-api-burst",
+			Usage:       "`Burst` to use while communicating with the Kubernetes apiserver.",
+			Value:       10,
+			Destination: &k.KubeAPIBurst,
+			EnvVars:     []string{"KUBE_API_BURST"},
+		},
+	}
+
+	return flags
+}
+
+func (k *KubeClientConfig) NewClientSetConfig() (*rest.Config, error) {
+	var csconfig *rest.Config
+
+	var err error
+	if k.KubeConfig == "" {
+		csconfig, err = rest.InClusterConfig()
+		if err != nil {
+			return nil, fmt.Errorf("create in-cluster client configuration: %w", err)
+		}
+	} else {
+		csconfig, err = clientcmd.BuildConfigFromFlags("", k.KubeConfig)
+		if err != nil {
+			return nil, fmt.Errorf("create out-of-cluster client configuration: %w", err)
+		}
+	}
+
+	csconfig.QPS = float32(k.KubeAPIQPS)
+	csconfig.Burst = k.KubeAPIBurst
+
+	return csconfig, nil
+}
+
+func (k *KubeClientConfig) NewClientSets() (ClientSets, error) {
+	csconfig, err := k.NewClientSetConfig()
+	if err != nil {
+		return ClientSets{}, fmt.Errorf("create client configuration: %w", err)
+	}
+
+	coreclient, err := coreclientset.NewForConfig(csconfig)
+	if err != nil {
+		return ClientSets{}, fmt.Errorf("create core client: %w", err)
+	}
+
+	nfdclient, err := nfdclientset.NewForConfig(csconfig)
+	if err != nil {
+		return ClientSets{}, fmt.Errorf("create nfd client: %w", err)
+	}
+
+	return ClientSets{
+		Core: coreclient,
+		NFD:  nfdclient,
+	}, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/flags/node.go b/pkg/nvidia-plugin/pkg/flags/node.go
new file mode 100644
index 000000000..8a38c98a8
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/flags/node.go
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2023 The Kubernetes Authors.
+ * Copyright 2024 NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package flags
+
+import (
+	"github.com/urfave/cli/v2"
+)
+
+type NodeConfig struct {
+	Name      string
+	Namespace string
+}
+
+func (n *NodeConfig) Flags() []cli.Flag {
+	flags := []cli.Flag{
+		&cli.StringFlag{
+			Name:        "namespace",
+			Usage:       "The namespace used for the custom resources.",
+			Value:       "default",
+			Destination: &n.Namespace,
+			EnvVars:     []string{"NAMESPACE"},
+		},
+		&cli.StringFlag{
+			Name:        "node-name",
+			Usage:       "The name of the node to be worked on.",
+			Destination: &n.Name,
+			EnvVars:     []string{"NODE_NAME"},
+		},
+	}
+	return flags
+}
diff --git a/pkg/nvidia-plugin/pkg/imex/imex.go b/pkg/nvidia-plugin/pkg/imex/imex.go
new file mode 100644
index 000000000..5b46d7baa
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/imex/imex.go
@@ -0,0 +1,98 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package imex
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"k8s.io/klog/v2"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+// Channels represents a set of IMEX channels.
+type Channels []*Channel
+
+// Channel represents an IMEX channel.
+type Channel struct {
+	ID       string
+	Path     string
+	HostPath string
+}
+
+// GetChannels returns the set of channels for the given config.
+// If the selection of the default IMEX channel is disabled no channels are returned.
+func GetChannels(config *spec.Config, devRoot string) (Channels, error) {
+	var channels Channels
+	for _, channelID := range config.Imex.ChannelIDs {
+		id := fmt.Sprintf("%d", channelID)
+		channelName := "channel" + id
+		path := filepath.Join("/dev/nvidia-caps-imex-channels", channelName)
+		channel := Channel{
+			ID:       id,
+			Path:     path,
+			HostPath: filepath.Join(devRoot, path),
+		}
+		if exists, err := channel.exists(); !exists {
+			if config.Imex.Required {
+				return nil, errors.Join(err, fmt.Errorf("requested IMEX channel %v does not exist", channelName))
+			}
+			klog.Warningf("Ignoring requested IMEX channel %v (%v)", channelName, err)
+			continue
+		}
+		klog.Infof("Selecting IMEX channel %v", channelName)
+		channels = append(channels, &channel)
+	}
+	return channels, nil
+}
+
+// exists checks whether the IMEX channel exists.
+// We check both the Path and HostPath since the location of the device node
+// associated with the channel in the container is dependent on how it is
+// injected.
+// For example, if the host driver root is mounted at /driver-root the channel
+// device node would be available at /driver-root/dev even if it was not
+// injected into the container through any other mechanism.
+// For the case of management containers using CDI to inject device nodes, these
+// device nodes would exist at /dev in the container instead.
+func (c Channel) exists() (bool, error) {
+	paths := []string{c.HostPath}
+	if c.HostPath != c.Path {
+		paths = append(paths, c.Path)
+	}
+	var errs error
+	for _, path := range paths {
+		info, err := os.Stat(path)
+		if os.IsNotExist(err) {
+			continue
+		}
+		if err != nil {
+			errs = errors.Join(errs, err)
+			continue
+		}
+
+		if info.Mode()&os.ModeCharDevice == 0 {
+			errs = errors.Join(errs, fmt.Errorf("%v is not a character device", path))
+			continue
+		}
+		return true, nil
+	}
+	return false, errs
+}
diff --git a/pkg/nvidia-plugin/pkg/info/version.go b/pkg/nvidia-plugin/pkg/info/version.go
new file mode 100644
index 000000000..231523590
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/info/version.go
@@ -0,0 +1,48 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package info
+
+import "strings"
+
+// version must be set by go build's -X main.version= option in the Makefile.
+var version = "unknown"
+
+// gitCommit will be the hash that the binary was built from
+// and will be populated by the Makefile.
+var gitCommit = ""
+
+// GetVersionParts returns the different version components.
+func GetVersionParts() []string {
+	v := []string{version}
+
+	if gitCommit != "" {
+		v = append(v, "commit: "+gitCommit)
+	}
+
+	return v
+}
+
+// GetVersionString returns the string representation of the version.
+func GetVersionString(more ...string) string {
+	v := append(GetVersionParts(), more...)
+	return strings.Join(v, "\n")
+}
+
+// GetVersion returns the version of the binary.
+func GetVersion() string {
+	return version
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/empty.go b/pkg/nvidia-plugin/pkg/lm/empty.go
new file mode 100644
index 000000000..b2a75dd52
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/empty.go
@@ -0,0 +1,24 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+// empty represents an empty set of labels
+type empty struct{}
+
+func (manager empty) Labels() (Labels, error) {
+	return nil, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/imex.go b/pkg/nvidia-plugin/pkg/lm/imex.go
new file mode 100644
index 000000000..2c692a9fa
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/imex.go
@@ -0,0 +1,182 @@
+/**
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"github.com/google/uuid"
+	"k8s.io/klog/v2"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+const (
+	// ImexNodesConfigFilePath is the path to the IMEX nodes config file.
+	// This file contains a list of IP addresses of the nodes in the IMEX domain.
+	ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg"
+)
+
+func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
+	var errs error
+	for _, root := range imexNodesConfigFilePathSearchRoots(config) {
+		configFilePath := filepath.Join(root, ImexNodesConfigFilePath)
+		imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices)
+		if err != nil {
+			errs = errors.Join(errs, err)
+			continue
+		}
+		if imexLabeler != nil {
+			klog.Infof("Using labeler for IMEX config %v", configFilePath)
+			return imexLabeler, nil
+		}
+	}
+	if errs != nil {
+		return nil, errs
+	}
+
+	return empty{}, nil
+}
+
+// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file.
+func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string {
+	// By default, search / and /config for config files.
+	roots := []string{"/", "/config"}
+
+	if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil {
+		return roots
+	}
+
+	// If a driver root is specified, it is also searched.
+	return append(roots, *config.Flags.Plugin.ContainerDriverRoot)
+}
+
+func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) {
+	imexConfigFile, err := os.Open(configFilePath)
+	if os.IsNotExist(err) {
+		// No imex config file, return empty labels
+		return nil, nil
+	} else if err != nil {
+		return nil, fmt.Errorf("failed to open imex config file: %v", err)
+	}
+	defer imexConfigFile.Close()
+
+	clusterUUID, cliqueID, err := getFabricIDs(devices)
+	if err != nil {
+		return nil, err
+	}
+	if clusterUUID == "" || cliqueID == "" {
+		return nil, nil
+	}
+
+	imexDomainID, err := getImexDomainID(imexConfigFile)
+	if err != nil {
+		return nil, err
+	}
+	if imexDomainID == "" {
+		return nil, nil
+	}
+
+	labels := Labels{
+		"nvidia.com/gpu.clique":      strings.Join([]string{clusterUUID, cliqueID}, "."),
+		"nvidia.com/gpu.imex-domain": strings.Join([]string{imexDomainID, cliqueID}, "."),
+	}
+
+	return labels, nil
+}
+
+func getFabricIDs(devices []resource.Device) (string, string, error) {
+	uniqueClusterUUIDs := make(map[string][]int)
+	uniqueCliqueIDs := make(map[string][]int)
+	for i, device := range devices {
+		isFabricAttached, err := device.IsFabricAttached()
+		if err != nil {
+			return "", "", fmt.Errorf("error checking imex capability: %v", err)
+		}
+		if !isFabricAttached {
+			continue
+		}
+
+		clusterUUID, cliqueID, err := device.GetFabricIDs()
+		if err != nil {
+
+			return "", "", fmt.Errorf("error getting fabric IDs: %w", err)
+		}
+
+		uniqueClusterUUIDs[clusterUUID] = append(uniqueClusterUUIDs[clusterUUID], i)
+		uniqueCliqueIDs[cliqueID] = append(uniqueCliqueIDs[cliqueID], i)
+	}
+
+	if len(uniqueClusterUUIDs) > 1 {
+		klog.Warningf("Cluster UUIDs are non-unique: %v", uniqueClusterUUIDs)
+		return "", "", nil
+	}
+
+	if len(uniqueCliqueIDs) > 1 {
+		klog.Warningf("Clique IDs are non-unique: %v", uniqueCliqueIDs)
+		return "", "", nil
+	}
+
+	for clusterUUID := range uniqueClusterUUIDs {
+		for cliqueID := range uniqueCliqueIDs {
+			return clusterUUID, cliqueID, nil
+		}
+	}
+	return "", "", nil
+}
+
+// getImexDomainID reads the imex config file and returns a unique identifier
+// based on the sorted list of IP addresses in the file.
+func getImexDomainID(r io.Reader) (string, error) {
+	// Read the file line by line
+	var ips []string
+	scanner := bufio.NewScanner(r)
+	for scanner.Scan() {
+		ip := strings.TrimSpace(scanner.Text())
+		if net.ParseIP(ip) == nil {
+			return "", fmt.Errorf("invalid IP address in imex config file: %s", ip)
+		}
+		ips = append(ips, ip)
+	}
+
+	if err := scanner.Err(); err != nil {
+		return "", fmt.Errorf("failed to read imex config file: %v", err)
+	}
+
+	if len(ips) == 0 {
+		// No IPs in the file, return empty labels
+		return "", nil
+	}
+
+	sort.Strings(ips)
+
+	return generateContentUUID(strings.Join(ips, "\n")), nil
+
+}
+
+func generateContentUUID(seed string) string {
+	return uuid.NewSHA1(uuid.Nil, []byte(seed)).String()
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/imex_test.go b/pkg/nvidia-plugin/pkg/lm/imex_test.go
new file mode 100644
index 000000000..553976465
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/imex_test.go
@@ -0,0 +1,57 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestGerenerateDomainUUID(t *testing.T) {
+	testCases := []struct {
+		description string
+		ips         []string
+		expected    string
+	}{
+		{
+			description: "single IP",
+			ips:         []string{"10.130.3.24"},
+			expected:    "60ad7226-0130-54d0-b762-2a5385a3a26f",
+		},
+		{
+			description: "multiple IPs",
+			ips: []string{
+				"10.130.3.24",
+				"10.130.3.53",
+				"10.130.3.23",
+				"10.130.3.31",
+				"10.130.3.27",
+				"10.130.3.25",
+			},
+			expected: "8a7363e9-1003-5814-9354-175fdff19204",
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			id := generateContentUUID(strings.Join(tc.ips, "\n"))
+			require.Equal(t, tc.expected, id)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/labeler.go b/pkg/nvidia-plugin/pkg/lm/labeler.go
new file mode 100644
index 000000000..d1f41341a
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/labeler.go
@@ -0,0 +1,45 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"fmt"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/vgpu"
+)
+
+// Labeler defines an interface for generating labels
+type Labeler interface {
+	Labels() (Labels, error)
+}
+
+// NewLabelers constructs the required labelers from the specified config
+func NewLabelers(manager resource.Manager, vgpu vgpu.Interface, config *spec.Config) (Labeler, error) {
+	deviceLabeler, err := NewDeviceLabeler(manager, config)
+	if err != nil {
+		return nil, fmt.Errorf("error creating labeler: %v", err)
+	}
+
+	l := Merge(
+		deviceLabeler,
+		NewVGPULabeler(vgpu),
+	)
+
+	return l, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/labels.go b/pkg/nvidia-plugin/pkg/lm/labels.go
new file mode 100644
index 000000000..8283b6a3a
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/labels.go
@@ -0,0 +1,25 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+// Labels defines a type for labels
+type Labels map[string]string
+
+// Labels also implements the Labeler interface
+func (labels Labels) Labels() (Labels, error) {
+	return labels, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/list.go b/pkg/nvidia-plugin/pkg/lm/list.go
new file mode 100644
index 000000000..decf0ee98
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/list.go
@@ -0,0 +1,46 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import "fmt"
+
+// list represents a list of labelers that iself implements the Labeler interface.
+type list []Labeler
+
+// Merge converts a set of labelers to a single composite labeler.
+func Merge(labelers ...Labeler) Labeler {
+	l := list(labelers)
+
+	return l
+}
+
+// Labels returns the labels from a set of labelers. Labels later in the list
+// overwrite earlier labels.
+func (labelers list) Labels() (Labels, error) {
+	allLabels := make(Labels)
+	for _, labeler := range labelers {
+		labels, err := labeler.Labels()
+		if err != nil {
+			return nil, fmt.Errorf("error generating labels: %v", err)
+		}
+		for k, v := range labels {
+			allLabels[k] = v
+		}
+	}
+
+	return allLabels, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/machine-type.go b/pkg/nvidia-plugin/pkg/lm/machine-type.go
new file mode 100644
index 000000000..887c180b6
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/machine-type.go
@@ -0,0 +1,53 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"k8s.io/klog/v2"
+)
+
+const (
+	machineTypeUnknown = "unknown"
+)
+
+func newMachineTypeLabeler(machineTypePath string) (Labeler, error) {
+	machineType, err := getMachineType(machineTypePath)
+	if err != nil {
+		klog.Warningf("Error getting machine type from %v: %v", machineTypePath, err)
+		machineType = machineTypeUnknown
+	}
+	l := Labels{
+		"nvidia.com/gpu.machine": sanitise(machineType),
+	}
+
+	return l, nil
+}
+
+func getMachineType(path string) (string, error) {
+	if path == "" {
+		return machineTypeUnknown, nil
+	}
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return "", fmt.Errorf("could not open machine type file: %v", err)
+	}
+	return strings.TrimSpace(string(data)), nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/mig-strategy.go b/pkg/nvidia-plugin/pkg/lm/mig-strategy.go
new file mode 100644
index 000000000..f77d40ea9
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/mig-strategy.go
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package lm
+
+import (
+	"fmt"
+
+	"k8s.io/klog/v2"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/mig"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+// Constants representing different MIG strategies.
+const (
+	MigStrategyNone   = "none"
+	MigStrategySingle = "single"
+	MigStrategyMixed  = "mixed"
+)
+
+// migResource is used to track MIG devices for labelling under the single and mixed strategies.
+// This allows a particular resource name to be associated with an resource.Device and count.
+type migResource struct {
+	name   spec.ResourceName
+	device resource.Device
+	count  int
+}
+
+// NewResourceLabeler creates a labeler for available GPU resources.
+// These include full GPU labels as well as labels specific to the mig-strategy specified.
+func NewResourceLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	devices, err := manager.GetDevices()
+	if err != nil {
+		return nil, fmt.Errorf("error getting devices: %v", err)
+	}
+
+	// If no GPUs are detected, we return an empty labeler
+	if len(devices) == 0 {
+		return empty{}, nil
+	}
+
+	fullGPULabeler, err := newGPULabelers(manager, config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct GPU labeler: %v", err)
+	}
+
+	if *config.Flags.MigStrategy == spec.MigStrategyNone {
+		return fullGPULabeler, nil
+	}
+
+	migLabeler, err := newMigLabeler(manager, config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct MIG resource labeler: %v", err)
+	}
+
+	labelers := Merge(
+		fullGPULabeler,
+		migLabeler,
+	)
+
+	return labelers, nil
+
+}
+
+// MigDeviceCounts maintains a count of unique MIG device types across all GPUs on a node
+type MigDeviceCounts map[string]int
+
+// newMigLabeler creates a labeler for MIG devices.
+// The labeler created depends on the migStrategy.
+func newMigLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	var err error
+	var labeler Labeler
+	switch *config.Flags.MigStrategy {
+	case MigStrategyNone:
+		labeler = empty{}
+	case MigStrategySingle:
+		labeler, err = newMigStrategySingleLabeler(manager, config)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create labeler for mig-strategy=single: %v", err)
+		}
+	case MigStrategyMixed:
+		labeler, err = newMigStrategyMixedLabeler(manager, config)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create labeler for mig-strategy=mixed: %v", err)
+		}
+	default:
+		return nil, fmt.Errorf("unknown strategy: %v", *config.Flags.MigStrategy)
+	}
+
+	labelers := Merge(
+		migStrategyLabeler(*config.Flags.MigStrategy),
+		labeler,
+	)
+
+	return labelers, nil
+}
+
+// newGPULabelers creates a set of labelers for full GPUs
+func newGPULabelers(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	deviceInfo := mig.NewDeviceInfo(manager)
+
+	devicesByMigEnabled, err := deviceInfo.GetDevicesMap()
+	if err != nil {
+		return nil, fmt.Errorf("error getting map of devices: %v", err)
+	}
+
+	if len(devicesByMigEnabled) == 0 {
+		return nil, fmt.Errorf("no GPU devices detected")
+	}
+
+	counts := make(map[string]int)
+	migEnabledDevices := make(map[string]resource.Device)
+	for _, device := range devicesByMigEnabled[true] {
+		name, err := device.GetName()
+		if err != nil {
+			return nil, fmt.Errorf("error getting device name: %v", err)
+		}
+		migEnabledDevices[name] = device
+		counts[name]++
+	}
+
+	fullGPUs := make(map[string]resource.Device)
+	for _, device := range devicesByMigEnabled[false] {
+		name, err := device.GetName()
+		if err != nil {
+			return nil, fmt.Errorf("error getting device name: %v", err)
+		}
+		fullGPUs[name] = device
+		counts[name]++
+	}
+
+	if len(counts) > 1 {
+		var names []string
+		for n := range counts {
+			names = append(names, n)
+		}
+		klog.Warningf("Multiple device types detected: %v", names)
+	}
+
+	var labelers list
+	// We construct labelers for the MIG-enabled resources.
+	// These do not include sharing information.
+	for name, migEnabledDevice := range migEnabledDevices {
+		// We generate a resource label with no sharing modifications
+		l, err := NewGPUResourceLabelerWithoutSharing(migEnabledDevice, counts[name])
+		if err != nil {
+			return nil, fmt.Errorf("failed to construct labeler: %v", err)
+		}
+
+		labelers = append(labelers, l)
+	}
+
+	// We construct labelers for the full GPUs.
+	// These override any resources with the same name that have MIG enabled.
+	for name, fullGPU := range fullGPUs {
+		l, err := NewGPUResourceLabeler(config, fullGPU, counts[name])
+		if err != nil {
+			return nil, fmt.Errorf("failed to construct labeler: %v", err)
+		}
+
+		labelers = append(labelers, l)
+	}
+
+	return labelers.Labels()
+}
+
+func newMigStrategySingleLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	deviceInfo := mig.NewDeviceInfo(manager)
+	migEnabledDevices, err := deviceInfo.GetDevicesWithMigEnabled()
+	if err != nil {
+		return nil, fmt.Errorf("unabled to retrieve list of MIG-enabled devices: %v", err)
+	}
+	// No devices have migEnabled=true. This is equivalent to the `none` MIG strategy
+	if len(migEnabledDevices) == 0 {
+		return empty{}, nil
+	}
+
+	hasEmpty, err := deviceInfo.AnyMigEnabledDeviceIsEmpty()
+	if err != nil {
+		return nil, fmt.Errorf("failed to check for empty MIG-enabled devices: %v", err)
+	}
+	// If any migEnabled=true device is empty, we return the set of mig-strategy-invalid labels.
+	if hasEmpty {
+		return newInvalidMigStrategyLabeler(migEnabledDevices[0], "at least one MIG device is enabled but empty")
+	}
+
+	migDisabledDevices, err := deviceInfo.GetDevicesWithMigDisabled()
+	if err != nil {
+		return nil, fmt.Errorf("unabled to retrieve list of non-MIG-enabled devices: %v", err)
+	}
+	// If we have a mix of mig-enabled and mig-disabled device we return the set of mig-strategy-invalid labels
+	if len(migDisabledDevices) != 0 {
+		return newInvalidMigStrategyLabeler(migEnabledDevices[0], "devices with MIG enabled and disable detected")
+	}
+
+	migs, err := deviceInfo.GetAllMigDevices()
+	if err != nil {
+		return nil, fmt.Errorf("unable to retrieve list of MIG devices: %v", err)
+	}
+
+	// Add new MIG related labels on each individual MIG type
+	resources := make(map[string]migResource)
+	for _, mig := range migs {
+		name, err := mig.GetName()
+		if err != nil {
+			return nil, fmt.Errorf("unable to get MIG device name: %v", err)
+		}
+
+		resource, exists := resources[name]
+		// For the first occurrence we update the device reference and the resource name
+		if !exists {
+			resource.device = mig
+			resource.name = fullGPUResourceName
+		}
+		// We increase the count
+		resource.count++
+
+		resources[name] = resource
+	}
+
+	// Multiple resources mean that we have more than one MIG profile defined. Return the set of mig-strategy-invalid labels.
+	if len(resources) != 1 {
+		return newInvalidMigStrategyLabeler(migEnabledDevices[0], "more than one MIG device type present on node")
+	}
+
+	return newMIGDeviceLabelers(resources, config)
+}
+
+func newInvalidMigStrategyLabeler(device resource.Device, reason string) (Labeler, error) {
+	klog.Warningf("Invalid configuration detected for mig-strategy=single: %v", reason)
+
+	model, err := device.GetName()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get device model: %v", err)
+	}
+
+	rl := resourceLabeler{
+		resourceName: "nvidia.com/gpu",
+	}
+
+	labels := rl.productLabel(model, "MIG", "INVALID")
+
+	rl.updateLabel(labels, "count", 0)
+	rl.updateLabel(labels, "replicas", 0)
+	rl.updateLabel(labels, "sharing-strategy", "")
+	rl.updateLabel(labels, "memory", 0)
+
+	return labels, nil
+}
+
+func newMigStrategyMixedLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	deviceInfo := mig.NewDeviceInfo(manager)
+
+	// Enumerate the MIG devices on this node. In mig.strategy=mixed we ignore devices
+	// configured with migEnabled=true but exposing no MIG devices.
+	migs, err := deviceInfo.GetAllMigDevices()
+	if err != nil {
+		return nil, fmt.Errorf("unable to retrieve list of MIG devices: %v", err)
+	}
+
+	// Add new MIG related labels on each individual MIG type
+	resources := make(map[string]migResource)
+	for _, mig := range migs {
+		name, err := mig.GetName()
+		if err != nil {
+			return nil, fmt.Errorf("unable to get MIG device name: %v", err)
+		}
+
+		resource, exists := resources[name]
+		// For the first occurrence we update the device reference and the resource name
+		if !exists {
+			resource.device = mig
+			resource.name = spec.ResourceName("nvidia.com/mig-" + name)
+		}
+		// We increase the count
+		resource.count++
+
+		resources[name] = resource
+	}
+
+	return newMIGDeviceLabelers(resources, config)
+}
+
+func newMIGDeviceLabelers(resources map[string]migResource, config *spec.Config) (Labeler, error) {
+	var labelers list
+	for _, resource := range resources {
+		l, err := NewMIGResourceLabeler(resource.name, config, resource.device, resource.count)
+		if err != nil {
+			return nil, fmt.Errorf("failed to construct labeler: %v", err)
+		}
+
+		labelers = append(labelers, l)
+	}
+
+	return labelers, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/mig-strategy_test.go b/pkg/nvidia-plugin/pkg/lm/mig-strategy_test.go
new file mode 100644
index 000000000..0897a0b0b
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/mig-strategy_test.go
@@ -0,0 +1,422 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+	rt "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource/testing"
+)
+
+func TestMigStrategyNoneLabels(t *testing.T) {
+	testCases := []struct {
+		description    string
+		devices        []resource.Device
+		timeSlicing    spec.ReplicatedResources
+		expectedError  bool
+		expectedLabels Labels
+	}{
+		{
+			description: "no devices returns empty labels",
+		},
+		{
+			description: "single non-mig device returns non-mig (none) labels",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+			},
+		},
+		{
+			description: "sharing is applied to single device",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+			},
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-SHARED",
+			},
+		},
+		{
+			description: "sharing is applied to multiple devices",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+				rt.NewFullGPU(),
+			},
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "2",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-SHARED",
+			},
+		},
+		{
+			description: "sharing is not applied to single MIG device; replicas is zero",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(),
+			},
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+			},
+		},
+		{
+			description: "sharing is not applied to multiple MIG device; replicas is zero",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(),
+				rt.NewMigEnabledDevice(),
+			},
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "2",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+			},
+		},
+		{
+			description: "sharing is applied to MIG device and non-MIG device",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(),
+				rt.NewFullGPU(),
+			},
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "2",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-SHARED",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			nvmlMock := rt.NewManagerMockWithDevices(tc.devices...)
+
+			config := spec.Config{
+				Flags: spec.Flags{
+					CommandLineFlags: spec.CommandLineFlags{
+						MigStrategy: ptr(MigStrategyNone),
+					},
+				},
+				Sharing: spec.Sharing{
+					TimeSlicing: tc.timeSlicing,
+				},
+			}
+
+			none, _ := NewResourceLabeler(nvmlMock, &config)
+
+			labels, err := none.Labels()
+			if tc.expectedError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+}
+
+func TestMigStrategySingleLabels(t *testing.T) {
+	testCases := []struct {
+		description    string
+		devices        []resource.Device
+		expectedError  bool
+		expectedLabels Labels
+		isInvalid      bool
+	}{
+		{
+			description: "no devices returns empty labels",
+		},
+		{
+			description: "single non-mig device returns non-mig (none) labels",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+		{
+			description: "multiple non-mig device returns non-mig (none) labels",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+				rt.NewFullGPU(),
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "2",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+		{
+			description: "single mig-enabled device returns mig labels",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100),
+				),
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "100",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-1g.100gb",
+				"nvidia.com/mig.strategy":         "single",
+				"nvidia.com/gpu.multiprocessors":  "0",
+				"nvidia.com/gpu.slices.gi":        "1",
+				"nvidia.com/gpu.slices.ci":        "2",
+				"nvidia.com/gpu.engines.copy":     "0",
+				"nvidia.com/gpu.engines.decoder":  "0",
+				"nvidia.com/gpu.engines.encoder":  "0",
+				"nvidia.com/gpu.engines.jpeg":     "0",
+				"nvidia.com/gpu.engines.ofa":      "0",
+			},
+		},
+		{
+			description: "multiple mig-enabled devices returns mig labels",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100, map[string]interface{}{
+						"multiprocessors": 12,
+						"engines.copy":    13,
+						"engines.decoder": 14,
+						"engines.encoder": 15,
+						"engines.jpeg":    16,
+						"engines.ofa":     17,
+					}),
+				),
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100, map[string]interface{}{
+						"multiprocessors": 12,
+						"engines.copy":    13,
+						"engines.decoder": 14,
+						"engines.encoder": 15,
+						"engines.jpeg":    16,
+						"engines.ofa":     17,
+					}),
+				),
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "2",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "100",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-1g.100gb",
+				"nvidia.com/mig.strategy":         "single",
+				"nvidia.com/gpu.multiprocessors":  "12",
+				"nvidia.com/gpu.slices.gi":        "1",
+				"nvidia.com/gpu.slices.ci":        "2",
+				"nvidia.com/gpu.engines.copy":     "13",
+				"nvidia.com/gpu.engines.decoder":  "14",
+				"nvidia.com/gpu.engines.encoder":  "15",
+				"nvidia.com/gpu.engines.jpeg":     "16",
+				"nvidia.com/gpu.engines.ofa":      "17",
+			},
+		},
+		{
+			description: "empty mig devices returns MIG invalid label",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(),
+			},
+			isInvalid: true,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "0",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "",
+				"nvidia.com/gpu.memory":           "0",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-INVALID",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+		{
+			description: "mixed mig config returns MIG invalid label",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100),
+					rt.NewMigDevice(3, 4, 100),
+				),
+			},
+			isInvalid: true,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "0",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "",
+				"nvidia.com/gpu.memory":           "0",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-INVALID",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+		{
+			description: "mixed mig enabled and disabled returns invalid config",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100),
+				),
+				rt.NewFullGPU(),
+			},
+			isInvalid: true,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "0",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "",
+				"nvidia.com/gpu.memory":           "0",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-INVALID",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+		{
+			description: "enabled, disabled, and empty returns invalid config",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(
+					rt.NewMigDevice(1, 2, 100),
+				),
+				rt.NewFullGPU(),
+				rt.NewMigEnabledDevice(),
+			},
+			isInvalid: true,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.count":            "0",
+				"nvidia.com/gpu.replicas":         "0",
+				"nvidia.com/gpu.sharing-strategy": "",
+				"nvidia.com/gpu.memory":           "0",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-INVALID",
+				"nvidia.com/mig.strategy":         "single",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			nvmlMock := rt.NewManagerMockWithDevices(tc.devices...)
+
+			config := spec.Config{
+				Flags: spec.Flags{
+					CommandLineFlags: spec.CommandLineFlags{
+						MigStrategy: ptr(MigStrategySingle),
+					},
+				},
+			}
+
+			single, _ := NewResourceLabeler(nvmlMock, &config)
+
+			labels, err := single.Labels()
+			if tc.expectedError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+}
+
+// prt returns a reference to whatever type is passed into it
+func ptr[T any](x T) *T {
+	return &x
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/nvml.go b/pkg/nvidia-plugin/pkg/lm/nvml.go
new file mode 100644
index 000000000..77bf7a817
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/nvml.go
@@ -0,0 +1,262 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"k8s.io/klog/v2"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvpci"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+var errMPSSharingNotSupported = errors.New("MPS sharing is not supported")
+
+// NewDeviceLabeler creates a new labeler for the specified resource manager.
+func NewDeviceLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	if err := manager.Init(); err != nil {
+		return nil, fmt.Errorf("failed to initialize resource manager: %v", err)
+	}
+	defer func() {
+		_ = manager.Shutdown()
+	}()
+
+	devices, err := manager.GetDevices()
+	if err != nil {
+		return nil, fmt.Errorf("error getting devices: %v", err)
+	}
+
+	if len(devices) == 0 {
+		return empty{}, nil
+	}
+
+	machineTypeLabeler, err := newMachineTypeLabeler(*config.Flags.GFD.MachineTypeFile)
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct machine type labeler: %v", err)
+	}
+
+	versionLabeler, err := newVersionLabeler(manager)
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct version labeler: %v", err)
+	}
+
+	migCapabilityLabeler, err := newMigCapabilityLabeler(manager)
+	if err != nil {
+		return nil, fmt.Errorf("error creating mig capability labeler: %v", err)
+	}
+
+	sharingLabeler, err := newSharingLabeler(manager, config)
+	if err != nil {
+		return nil, fmt.Errorf("error creating sharing labeler: %w", err)
+	}
+
+	resourceLabeler, err := NewResourceLabeler(manager, config)
+	if err != nil {
+		return nil, fmt.Errorf("error creating resource labeler: %v", err)
+	}
+
+	gpuModeLabeler, err := newGPUModeLabeler(devices)
+	if err != nil {
+		return nil, fmt.Errorf("error creating resource labeler: %v", err)
+	}
+
+	imexLabeler, err := newImexLabeler(config, devices)
+	if err != nil {
+		return nil, fmt.Errorf("error creating IMEX labeler: %v", err)
+	}
+
+	l := Merge(
+		machineTypeLabeler,
+		versionLabeler,
+		migCapabilityLabeler,
+		sharingLabeler,
+		resourceLabeler,
+		gpuModeLabeler,
+		imexLabeler,
+	)
+
+	return l, nil
+}
+
+// newVersionLabeler creates a labeler that generates the CUDA and driver version labels.
+func newVersionLabeler(manager resource.Manager) (Labeler, error) {
+	driverVersion, err := manager.GetDriverVersion()
+	if err != nil {
+		return nil, fmt.Errorf("error getting driver version: %v", err)
+	}
+
+	driverVersionSplit := strings.Split(driverVersion, ".")
+	if len(driverVersionSplit) > 3 || len(driverVersionSplit) < 2 {
+		return nil, fmt.Errorf("error getting driver version: Version \"%s\" does not match format \"X.Y[.Z]\"", driverVersion)
+	}
+
+	driverMajor := driverVersionSplit[0]
+	driverMinor := driverVersionSplit[1]
+	driverRev := ""
+	if len(driverVersionSplit) > 2 {
+		driverRev = driverVersionSplit[2]
+	}
+
+	cudaMajor, cudaMinor, err := manager.GetCudaDriverVersion()
+	if err != nil {
+		return nil, fmt.Errorf("error getting cuda driver version: %v", err)
+	}
+
+	labels := Labels{
+		// Deprecated labels
+		"nvidia.com/cuda.driver.major":  driverMajor,
+		"nvidia.com/cuda.driver.minor":  driverMinor,
+		"nvidia.com/cuda.driver.rev":    driverRev,
+		"nvidia.com/cuda.runtime.major": fmt.Sprintf("%d", cudaMajor),
+		"nvidia.com/cuda.runtime.minor": fmt.Sprintf("%d", cudaMinor),
+
+		// New labels
+		"nvidia.com/cuda.driver-version.major":    driverMajor,
+		"nvidia.com/cuda.driver-version.minor":    driverMinor,
+		"nvidia.com/cuda.driver-version.revision": driverRev,
+		"nvidia.com/cuda.driver-version.full":     driverVersion,
+		"nvidia.com/cuda.runtime-version.major":   fmt.Sprintf("%d", cudaMajor),
+		"nvidia.com/cuda.runtime-version.minor":   fmt.Sprintf("%d", cudaMinor),
+		"nvidia.com/cuda.runtime-version.full":    fmt.Sprintf("%d.%d", cudaMajor, cudaMinor),
+	}
+	return labels, nil
+}
+
+// newMigCapabilityLabeler creates a new MIG capability labeler using the provided NVML library.
+// If any GPU on the node is mig-capable the label is set to true.
+func newMigCapabilityLabeler(manager resource.Manager) (Labeler, error) {
+	isMigCapable := false
+
+	devices, err := manager.GetDevices()
+	if err != nil {
+		return nil, err
+	}
+	if len(devices) == 0 {
+		// no devices, return empty labels
+		return empty{}, nil
+	}
+
+	// loop through all devices to check if any one of them is MIG capable
+	for _, d := range devices {
+		isMigCapable, err = d.IsMigCapable()
+		if err != nil {
+			return nil, fmt.Errorf("error getting mig capability: %v", err)
+		}
+		if isMigCapable {
+			break
+		}
+	}
+
+	labels := Labels{
+		"nvidia.com/mig.capable": strconv.FormatBool(isMigCapable),
+	}
+	return labels, nil
+}
+
+func newSharingLabeler(manager resource.Manager, config *spec.Config) (Labeler, error) {
+	if config == nil || config.Sharing.SharingStrategy() != spec.SharingStrategyMPS {
+		labels := Labels{
+			"nvidia.com/mps.capable": "false",
+		}
+		return labels, nil
+	}
+
+	capable, err := isMPSCapable(manager)
+	if err != nil {
+		return nil, fmt.Errorf("failed to check MPS-capable: %w", err)
+	}
+
+	labels := Labels{
+		"nvidia.com/mps.capable": strconv.FormatBool(capable),
+	}
+	return labels, nil
+}
+
+func isMPSCapable(manager resource.Manager) (bool, error) {
+	devices, err := manager.GetDevices()
+	if err != nil {
+		return false, fmt.Errorf("failed to get device: %w", err)
+	}
+
+	for _, d := range devices {
+		isMigEnabled, err := d.IsMigEnabled()
+		if err != nil {
+			return false, fmt.Errorf("failed to check if device is MIG-enabled: %w", err)
+		}
+		if isMigEnabled {
+			return false, fmt.Errorf("%w for mig devices", errMPSSharingNotSupported)
+		}
+	}
+	return true, nil
+}
+
+// newGPUModeLabeler creates a new labeler that reports the mode of GPUs on the node.
+// GPUs can be in Graphics or Compute mode.
+func newGPUModeLabeler(devices []resource.Device) (Labeler, error) {
+	classes, err := getDeviceClasses(devices)
+	if err != nil {
+		return nil, err
+	}
+	gpuMode := getModeForClasses(classes)
+	labels := Labels{
+		"nvidia.com/gpu.mode": gpuMode,
+	}
+	return labels, nil
+}
+
+func getModeForClasses(classes []uint32) string {
+	if len(classes) == 0 {
+		return "unknown"
+	}
+	for _, class := range classes {
+		if class != classes[0] {
+			klog.Infof("Not all GPU devices belong to the same class %#06x ", classes)
+			return "unknown"
+		}
+	}
+	switch classes[0] {
+	case nvpci.PCIVgaControllerClass:
+		return "graphics"
+	case nvpci.PCI3dControllerClass:
+		return "compute"
+	default:
+		return "unknown"
+	}
+}
+
+func getDeviceClasses(devices []resource.Device) ([]uint32, error) {
+	seenClasses := make(map[uint32]bool)
+	for _, d := range devices {
+		class, err := d.GetPCIClass()
+		if err != nil {
+			return nil, err
+		}
+		seenClasses[class] = true
+	}
+
+	var classes []uint32
+	for class := range seenClasses {
+		classes = append(classes, class)
+	}
+	return classes, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/nvml_test.go b/pkg/nvidia-plugin/pkg/lm/nvml_test.go
new file mode 100644
index 000000000..fb6fa4793
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/nvml_test.go
@@ -0,0 +1,292 @@
+package lm
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+	rt "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource/testing"
+)
+
+func TestMigCapabilityLabeler(t *testing.T) {
+	testCases := []struct {
+		description    string
+		devices        []resource.Device
+		expectedError  bool
+		expectedLabels map[string]string
+	}{
+		{
+			description: "no devices returns empty labels",
+		},
+		{
+			description: "single non-mig capable device returns mig.capable as false",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mig.capable": "false",
+			},
+		},
+		{
+			description: "multiple non-mig capable devices returns mig.capable as false",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+				rt.NewFullGPU(),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mig.capable": "false",
+			},
+		},
+		{
+			description: "single mig capable device returns mig.capable as true",
+			devices: []resource.Device{
+				rt.NewMigEnabledDevice(),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mig.capable": "true",
+			},
+		},
+		{
+			description: "one mig capable device among multiple returns mig.capable as true",
+			devices: []resource.Device{
+				rt.NewFullGPU(),
+				rt.NewMigEnabledDevice(),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mig.capable": "true",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			nvmlMock := rt.NewManagerMockWithDevices(tc.devices...)
+
+			migCapabilityLabeler, _ := newMigCapabilityLabeler(nvmlMock)
+
+			labels, err := migCapabilityLabeler.Labels()
+			if tc.expectedError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+}
+
+func TestSharingLabeler(t *testing.T) {
+	testCases := []struct {
+		description    string
+		manager        resource.Manager
+		config         *spec.Config
+		expectedLabels map[string]string
+		expectedError  error
+	}{
+		{
+			description: "nil config",
+			expectedLabels: map[string]string{
+				"nvidia.com/mps.capable": "false",
+			},
+		},
+		{
+			description: "empty config",
+			config:      &spec.Config{},
+			expectedLabels: map[string]string{
+				"nvidia.com/mps.capable": "false",
+			},
+		},
+		{
+			description: "config with timeslicing replicas",
+			config: &spec.Config{
+				Sharing: spec.Sharing{
+					TimeSlicing: spec.ReplicatedResources{
+						Resources: []spec.ReplicatedResource{
+							{
+								Replicas: 2,
+							},
+						},
+					},
+				},
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mps.capable": "false",
+			},
+		},
+		{
+			description: "config with no mps replicas",
+			config: &spec.Config{
+				Sharing: spec.Sharing{
+					MPS: &spec.ReplicatedResources{
+						Resources: []spec.ReplicatedResource{
+							{
+								Replicas: 1,
+							},
+						},
+					},
+				},
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mps.capable": "false",
+			},
+		},
+		{
+			description: "config with mps replicas no-mig-devices",
+			manager: &resource.ManagerMock{
+				GetDevicesFunc: func() ([]resource.Device, error) {
+					devices := []resource.Device{
+						&resource.DeviceMock{
+							IsMigEnabledFunc: func() (bool, error) {
+								return false, nil
+							},
+						},
+					}
+					return devices, nil
+				},
+			},
+			config: &spec.Config{
+				Sharing: spec.Sharing{
+					MPS: &spec.ReplicatedResources{
+						Resources: []spec.ReplicatedResource{
+							{
+								Replicas: 2,
+							},
+						},
+					},
+				},
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/mps.capable": "true",
+			},
+		},
+		{
+			description: "config with mps replicas mig-devices",
+			manager: &resource.ManagerMock{
+				GetDevicesFunc: func() ([]resource.Device, error) {
+					devices := []resource.Device{
+						&resource.DeviceMock{
+							IsMigEnabledFunc: func() (bool, error) {
+								return true, nil
+							},
+						},
+					}
+					return devices, nil
+				},
+			},
+			config: &spec.Config{
+				Sharing: spec.Sharing{
+					MPS: &spec.ReplicatedResources{
+						Resources: []spec.ReplicatedResource{
+							{
+								Replicas: 2,
+							},
+						},
+					},
+				},
+			},
+			expectedError:  errMPSSharingNotSupported,
+			expectedLabels: nil,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			labels, err := newSharingLabeler(tc.manager, tc.config)
+			require.ErrorIs(t, err, tc.expectedError)
+			if tc.expectedError != nil {
+				require.Nil(t, labels)
+			} else {
+				require.EqualValues(t, tc.expectedLabels, labels)
+			}
+		})
+	}
+}
+
+func TestGPUModeLabeler(t *testing.T) {
+	testCases := []struct {
+		description    string
+		devices        []resource.Device
+		expectedError  bool
+		expectedLabels map[string]string
+	}{
+		{
+			description: "single device with compute PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x030000),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "graphics",
+			},
+		},
+		{
+			description: "single device with graphics PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x030200),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "compute",
+			},
+		},
+		{
+			description: "single device with switch PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x068000),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "unknown",
+			},
+		},
+		{
+			description: "multiple device have same graphics PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x030200),
+				rt.NewDeviceWithPCIClassMock(0x030200),
+				rt.NewDeviceWithPCIClassMock(0x030200),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "compute",
+			},
+		},
+		{
+			description: "multiple device have same compute PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x030000),
+				rt.NewDeviceWithPCIClassMock(0x030000),
+				rt.NewDeviceWithPCIClassMock(0x030000),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "graphics",
+			},
+		},
+		{
+			description: "multiple device with some with graphics and others with compute PCI class",
+			devices: []resource.Device{
+				rt.NewDeviceWithPCIClassMock(0x030000),
+				rt.NewDeviceWithPCIClassMock(0x030200),
+				rt.NewDeviceWithPCIClassMock(0x030000),
+			},
+			expectedLabels: map[string]string{
+				"nvidia.com/gpu.mode": "unknown",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+
+			gpuModeLabeler, _ := newGPUModeLabeler(tc.devices)
+
+			labels, err := gpuModeLabeler.Labels()
+			if tc.expectedError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/output.go b/pkg/nvidia-plugin/pkg/lm/output.go
new file mode 100644
index 000000000..16a1af3ff
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/output.go
@@ -0,0 +1,155 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	apiequality "k8s.io/apimachinery/pkg/api/equality"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
+	nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
+	nfdclientset "sigs.k8s.io/node-feature-discovery/pkg/generated/clientset/versioned"
+
+	"github.com/google/renameio"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/flags"
+)
+
+// Outputer defines a mechanism to output labels.
+type Outputer interface {
+	Output(Labels) error
+}
+
+// TODO: Replace this with functional options.
+func NewOutputer(config *spec.Config, nodeConfig flags.NodeConfig, clientSets flags.ClientSets) (Outputer, error) {
+	if config.Flags.UseNodeFeatureAPI == nil || !*config.Flags.UseNodeFeatureAPI {
+		return ToFile(*config.Flags.GFD.OutputFile), nil
+	}
+
+	if nodeConfig.Name == "" {
+		return nil, fmt.Errorf("required flag node-name not set")
+	}
+	if nodeConfig.Namespace == "" {
+		return nil, fmt.Errorf("required flag namespace not set")
+	}
+	o := nodeFeatureObject{
+		nodeConfig:   nodeConfig,
+		nfdClientset: clientSets.NFD,
+	}
+	return &o, nil
+}
+
+func ToFile(path string) Outputer {
+	if path == "" {
+		return &toWriter{os.Stdout}
+	}
+
+	o := toFile(path)
+	return &o
+}
+
+// toFile writes to the specified file.
+type toFile string
+
+// toWriter writes to the specified writer
+type toWriter struct {
+	io.Writer
+}
+
+func (path *toFile) Output(labels Labels) error {
+	klog.Infof("Writing labels to output file %v", *path)
+
+	buffer := new(bytes.Buffer)
+	output := &toWriter{buffer}
+	if err := output.Output(labels); err != nil {
+		return fmt.Errorf("error writing labels to buffer: %v", err)
+	}
+	// write file atomically
+	if err := renameio.WriteFile(string(*path), buffer.Bytes(), 0644); err != nil {
+		return fmt.Errorf("error atomically writing file '%s': %w", *path, err)
+	}
+	return nil
+}
+
+func (output *toWriter) Output(labels Labels) error {
+	for k, v := range labels {
+		_, err := fmt.Fprintf(output, "%s=%s\n", k, v)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+const nodeFeatureVendorPrefix = "nvidia-features-for"
+
+type nodeFeatureObject struct {
+	nodeConfig   flags.NodeConfig
+	nfdClientset nfdclientset.Interface
+}
+
+// UpdateNodeFeatureObject creates/updates the node-specific NodeFeature custom resource.
+func (n *nodeFeatureObject) Output(labels Labels) error {
+	nodename := n.nodeConfig.Name
+	if nodename == "" {
+		return fmt.Errorf("required flag %q not set", "node-name")
+	}
+	namespace := n.nodeConfig.Namespace
+	nodeFeatureName := strings.Join([]string{nodeFeatureVendorPrefix, nodename}, "-")
+
+	if nfr, err := n.nfdClientset.NfdV1alpha1().NodeFeatures(namespace).Get(context.TODO(), nodeFeatureName, metav1.GetOptions{}); errors.IsNotFound(err) {
+		klog.Infof("creating NodeFeature object %s", nodeFeatureName)
+		nfr = &nfdv1alpha1.NodeFeature{
+			TypeMeta:   metav1.TypeMeta{},
+			ObjectMeta: metav1.ObjectMeta{Name: nodeFeatureName, Labels: map[string]string{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodename}},
+			Spec:       nfdv1alpha1.NodeFeatureSpec{Features: *nfdv1alpha1.NewFeatures(), Labels: labels},
+		}
+
+		nfrCreated, err := n.nfdClientset.NfdV1alpha1().NodeFeatures(namespace).Create(context.TODO(), nfr, metav1.CreateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create NodeFeature object %q: %w", nfr.Name, err)
+		}
+
+		klog.Infof("NodeFeature object created: %v", nfrCreated)
+	} else if err != nil {
+		return fmt.Errorf("failed to get NodeFeature object: %w", err)
+	} else {
+		nfrUpdated := nfr.DeepCopy()
+		nfrUpdated.Labels = map[string]string{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodename}
+		nfrUpdated.Spec = nfdv1alpha1.NodeFeatureSpec{Features: *nfdv1alpha1.NewFeatures(), Labels: labels}
+
+		if !apiequality.Semantic.DeepEqual(nfr, nfrUpdated) {
+			klog.Infof("updating NodeFeature object %s", nodeFeatureName)
+			nfrUpdated, err = n.nfdClientset.NfdV1alpha1().NodeFeatures(namespace).Update(context.TODO(), nfrUpdated, metav1.UpdateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to update NodeFeature object %q: %w", nfr.Name, err)
+			}
+			klog.Infof("NodeFeature object updated: %v", nfrUpdated)
+		} else {
+			klog.Infof("no changes in NodeFeature object, not updating")
+		}
+	}
+	return nil
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/resource.go b/pkg/nvidia-plugin/pkg/lm/resource.go
new file mode 100644
index 000000000..799adea4c
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/resource.go
@@ -0,0 +1,319 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+const fullGPUResourceName = "nvidia.com/gpu"
+
+// NewGPUResourceLabelerWithoutSharing creates a resource labeler for the specified device that does not apply sharing labels.
+func NewGPUResourceLabelerWithoutSharing(device resource.Device, count int) (Labeler, error) {
+	// NOTE: We use a nil config to signal that sharing is disabled.
+	return NewGPUResourceLabeler(nil, device, count)
+}
+
+// NewGPUResourceLabeler creates a resource labeler for the specified full GPU device with the specified count
+func NewGPUResourceLabeler(config *spec.Config, device resource.Device, count int) (Labeler, error) {
+	if count == 0 {
+		return empty{}, nil
+	}
+
+	model, err := device.GetName()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get device model: %v", err)
+	}
+
+	totalMemoryMB, err := device.GetTotalMemoryMB()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get memory info for device: %v", err)
+	}
+
+	resourceLabeler := newResourceLabeler(fullGPUResourceName, config)
+
+	architectureLabels, err := newArchitectureLabels(resourceLabeler, device)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create architecture labels: %v", err)
+	}
+
+	memoryLabeler := (Labeler)(&empty{})
+	if totalMemoryMB != 0 {
+		memoryLabeler = resourceLabeler.single("memory", totalMemoryMB)
+	}
+
+	labelers := Merge(
+		resourceLabeler.baseLabeler(count, model),
+		memoryLabeler,
+		architectureLabels,
+	)
+
+	return labelers, nil
+}
+
+// NewMIGResourceLabeler creates a resource labeler for the specified full GPU device with the specified resource name.
+func NewMIGResourceLabeler(resourceName spec.ResourceName, config *spec.Config, device resource.Device, count int) (Labeler, error) {
+	if count == 0 {
+		return empty{}, nil
+	}
+
+	parent, err := device.GetDeviceHandleFromMigDeviceHandle()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get parent of MIG device: %v", err)
+	}
+	model, err := parent.GetName()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get device model: %v", err)
+	}
+
+	migProfile, err := device.GetName()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get MIG profile name: %v", err)
+	}
+
+	resourceLabeler := newResourceLabeler(resourceName, config)
+
+	attributeLabels, err := newMigAttributeLabels(resourceLabeler, device)
+	if err != nil {
+		return nil, fmt.Errorf("faled to get MIG attribute labels: %v", err)
+	}
+
+	labelers := Merge(
+		resourceLabeler.baseLabeler(count, model, "MIG", migProfile),
+		attributeLabels,
+	)
+
+	return labelers, nil
+}
+
+func newResourceLabeler(resourceName spec.ResourceName, config *spec.Config) resourceLabeler {
+	var sharing *spec.Sharing
+	if config != nil {
+		sharing = &config.Sharing
+	}
+	return resourceLabeler{
+		resourceName: resourceName,
+		sharing:      sharing,
+	}
+
+}
+
+type resourceLabeler struct {
+	resourceName spec.ResourceName
+	sharing      *spec.Sharing
+}
+
+// single creates a single label for the resource. The label key is
+// <fully-qualified-resource-name>.suffix
+func (rl resourceLabeler) single(suffix string, value interface{}) Labels {
+	return rl.labels(map[string]interface{}{suffix: value})
+
+}
+
+// labels creates a set of labels from the specified map for the resource.
+// Each key in the map corresponds to a label <fully-qualified-resource-name>.key
+func (rl resourceLabeler) labels(suffixValues map[string]interface{}) Labels {
+	labels := make(Labels)
+	for suffix, value := range suffixValues {
+		rl.updateLabel(labels, suffix, value)
+	}
+
+	return labels
+}
+
+// updateLabel modifies the specified labels, updating <fully-qualified-resource-name>.suffix with
+// the provided value.
+func (rl resourceLabeler) updateLabel(labels Labels, suffix string, value interface{}) {
+	key := rl.key(suffix)
+
+	labels[key] = fmt.Sprintf("%v", value)
+}
+
+// key generates the label key for the specified suffix. The key is generated as
+// <fully-qualified-resource-name>.suffix
+func (rl resourceLabeler) key(suffix string) string {
+	return string(rl.resourceName) + "." + suffix
+}
+
+// baseLabeler generates the product, count, and replicas labels for the resource
+func (rl resourceLabeler) baseLabeler(count int, parts ...string) Labeler {
+	replicas := rl.getReplicas()
+	strategy := spec.SharingStrategyNone
+	if rl.sharing != nil && replicas > 1 {
+		strategy = rl.sharing.SharingStrategy()
+	}
+	rawLabels := map[string]interface{}{
+		"product":          rl.getProductName(parts...),
+		"count":            count,
+		"replicas":         replicas,
+		"sharing-strategy": strategy,
+	}
+
+	labels := make(Labels)
+	for k, v := range rawLabels {
+		labels[rl.key(k)] = fmt.Sprintf("%v", v)
+	}
+	return labels
+}
+
+// Deprecated
+func (rl resourceLabeler) productLabel(parts ...string) Labels {
+	name := rl.getProductName(parts...)
+	if name == "" {
+		return make(Labels)
+	}
+	return rl.single("product", name)
+}
+
+func (rl resourceLabeler) getProductName(parts ...string) string {
+	var strippedParts []string
+	for _, p := range parts {
+		if p != "" {
+			sanitisedPart := sanitise(p)
+			strippedParts = append(strippedParts, sanitisedPart)
+		}
+	}
+
+	if len(strippedParts) == 0 {
+		return ""
+	}
+
+	if rl.isShared() && !rl.isRenamed() {
+		strippedParts = append(strippedParts, "SHARED")
+	}
+	return strings.Join(strippedParts, "-")
+}
+
+func (rl resourceLabeler) getReplicas() int {
+	if rl.sharingDisabled() {
+		return 0
+	} else if r := rl.replicationInfo(); r != nil && r.Replicas > 0 {
+		return r.Replicas
+	}
+	return 1
+}
+
+// sharingDisabled checks whether the resourceLabeler has sharing disabled
+// TODO: The nil check here is because we call NewGPUResourceLabeler with a nil config when sharing is disabled.
+func (rl resourceLabeler) sharingDisabled() bool {
+	return rl.sharing == nil
+}
+
+// isShared checks whether the resource is shared.
+func (rl resourceLabeler) isShared() bool {
+	if r := rl.replicationInfo(); r != nil && r.Replicas > 1 {
+		return true
+	}
+	return false
+}
+
+// isRenamed checks whether the resource is renamed.
+func (rl resourceLabeler) isRenamed() bool {
+	if r := rl.replicationInfo(); r != nil && r.Rename != "" {
+		return true
+	}
+	return false
+}
+
+// replicationInfo searches the associated config for the resource and returns the replication info
+func (rl resourceLabeler) replicationInfo() *spec.ReplicatedResource {
+	if rl.sharingDisabled() {
+		return nil
+	}
+	for _, r := range rl.sharing.ReplicatedResources().Resources {
+		if r.Name == rl.resourceName {
+			return &r
+		}
+	}
+	return nil
+}
+
+func newMigAttributeLabels(rl resourceLabeler, device resource.Device) (Labels, error) {
+	attributes, err := device.GetAttributes()
+	if err != nil {
+		return nil, fmt.Errorf("unable to get attributes of MIG device: %v", err)
+	}
+
+	labels := rl.labels(attributes)
+
+	return labels, nil
+}
+
+func newArchitectureLabels(rl resourceLabeler, device resource.Device) (Labels, error) {
+	computeMajor, computeMinor, err := device.GetCudaComputeCapability()
+	if err != nil {
+		return nil, fmt.Errorf("failed to determine CUDA compute capability: %v", err)
+	}
+
+	if computeMajor == 0 {
+		return make(Labels), nil
+	}
+
+	family := getArchFamily(computeMajor, computeMinor)
+
+	labels := rl.labels(map[string]interface{}{
+		"family":        family,
+		"compute.major": computeMajor,
+		"compute.minor": computeMinor,
+	})
+
+	return labels, nil
+}
+
+// TODO: This should a function in go-nvlib
+func getArchFamily(computeMajor, computeMinor int) string {
+	switch computeMajor {
+	case 1:
+		return "tesla"
+	case 2:
+		return "fermi"
+	case 3:
+		return "kepler"
+	case 5:
+		return "maxwell"
+	case 6:
+		return "pascal"
+	case 7:
+		if computeMinor < 5 {
+			return "volta"
+		}
+		return "turing"
+	case 8:
+		if computeMinor < 9 {
+			return "ampere"
+		}
+		return "ada-lovelace"
+	case 9:
+		return "hopper"
+	}
+	return "undefined"
+}
+
+func sanitise(input string) string {
+	var sanitised string
+	re := regexp.MustCompile("[^A-Za-z0-9-_. ]")
+	input = re.ReplaceAllString(input, "")
+	// remove redundant blank spaces
+	sanitised = strings.Join(strings.Fields(input), "-")
+
+	return sanitised
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/resource_test.go b/pkg/nvidia-plugin/pkg/lm/resource_test.go
new file mode 100644
index 000000000..24b279bea
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/resource_test.go
@@ -0,0 +1,437 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	rt "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource/testing"
+)
+
+func TestGPUResourceLabeler(t *testing.T) {
+	device := rt.NewFullGPU()
+
+	testCases := []struct {
+		description    string
+		count          int
+		sharing        spec.Sharing
+		expectedLabels Labels
+	}{
+		{
+			description: "zero count returns empty",
+		},
+		{
+			description: "no sharing",
+			count:       1,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "time-slicing ignores non-matching resource",
+			count:       1,
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/not-gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "time-slicing appends suffix and doubles count",
+			count:       1,
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-SHARED",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "time-slicing renamed does not append suffix and doubles count",
+			count:       1,
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Rename:   "nvidia.com/gpu.shared",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "mps ignores non-matching resource",
+			count:       1,
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/not-gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "mps appends suffix and doubles count",
+			count:       1,
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "mps",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-SHARED",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+		{
+			description: "mps renamed does not append suffix and doubles count",
+			count:       1,
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Rename:   "nvidia.com/gpu.shared",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "mps",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL",
+				"nvidia.com/gpu.family":           "ampere",
+				"nvidia.com/gpu.compute.major":    "8",
+				"nvidia.com/gpu.compute.minor":    "0",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			config := &spec.Config{
+				Sharing: tc.sharing,
+			}
+			l, err := NewGPUResourceLabeler(config, device, tc.count)
+			require.NoError(t, err)
+
+			labels, err := l.Labels()
+			require.NoError(t, err)
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+
+}
+
+func TestSanitise(t *testing.T) {
+	testCases := []struct {
+		input    string
+		expected string
+	}{
+		{
+			input:    "a space separated string",
+			expected: "a-space-separated-string",
+		},
+		{
+			input:    "some(thing)else",
+			expected: "somethingelse",
+		},
+		{
+			input:    "some ( thing )else",
+			expected: "some-thing-else",
+		},
+		{
+			input:    "NVIDIA-TITAN-X-(Pascal)",
+			expected: "NVIDIA-TITAN-X-Pascal",
+		},
+		{
+			input:    " input  with multiple   spaces   ",
+			expected: "input-with-multiple-spaces",
+		},
+		{
+			input:    "some [ / thing / ]else",
+			expected: "some-thing-else",
+		},
+		{
+			input:    "some / thing /else",
+			expected: "some-thing-else",
+		},
+		{
+			input:    "some-thing.else_new",
+			expected: "some-thing.else_new",
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.input, func(t *testing.T) {
+			require.EqualValues(t, tc.expected, sanitise(tc.input))
+		})
+	}
+
+}
+
+func TestMigResourceLabeler(t *testing.T) {
+
+	device := rt.NewMigDevice(1, 2, 300)
+	rt.NewMigEnabledDevice(device)
+
+	testCases := []struct {
+		description    string
+		resourceName   spec.ResourceName
+		count          int
+		timeSlicing    spec.ReplicatedResources
+		expectedLabels Labels
+	}{
+		{
+			description: "zero count returns empty",
+		},
+		{
+			description:  "no sharing",
+			resourceName: "nvidia.com/gpu",
+			count:        1,
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "1",
+				"nvidia.com/gpu.sharing-strategy": "none",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-1g.300gb",
+				"nvidia.com/gpu.multiprocessors":  "0",
+				"nvidia.com/gpu.slices.gi":        "1",
+				"nvidia.com/gpu.slices.ci":        "2",
+				"nvidia.com/gpu.engines.copy":     "0",
+				"nvidia.com/gpu.engines.decoder":  "0",
+				"nvidia.com/gpu.engines.encoder":  "0",
+				"nvidia.com/gpu.engines.jpeg":     "0",
+				"nvidia.com/gpu.engines.ofa":      "0",
+			},
+		},
+		{
+			description:  "shared appends suffix and doubles count",
+			resourceName: "nvidia.com/gpu",
+			count:        1,
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-1g.300gb-SHARED",
+				"nvidia.com/gpu.multiprocessors":  "0",
+				"nvidia.com/gpu.slices.gi":        "1",
+				"nvidia.com/gpu.slices.ci":        "2",
+				"nvidia.com/gpu.engines.copy":     "0",
+				"nvidia.com/gpu.engines.decoder":  "0",
+				"nvidia.com/gpu.engines.encoder":  "0",
+				"nvidia.com/gpu.engines.jpeg":     "0",
+				"nvidia.com/gpu.engines.ofa":      "0",
+			},
+		},
+		{
+			description:  "renamed does not append suffix and doubles count",
+			resourceName: "nvidia.com/gpu",
+			count:        1,
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Rename:   "nvidia.com/gpu.shared",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/gpu.count":            "1",
+				"nvidia.com/gpu.replicas":         "2",
+				"nvidia.com/gpu.sharing-strategy": "time-slicing",
+				"nvidia.com/gpu.memory":           "300",
+				"nvidia.com/gpu.product":          "MOCKMODEL-MIG-1g.300gb",
+				"nvidia.com/gpu.multiprocessors":  "0",
+				"nvidia.com/gpu.slices.gi":        "1",
+				"nvidia.com/gpu.slices.ci":        "2",
+				"nvidia.com/gpu.engines.copy":     "0",
+				"nvidia.com/gpu.engines.decoder":  "0",
+				"nvidia.com/gpu.engines.encoder":  "0",
+				"nvidia.com/gpu.engines.jpeg":     "0",
+				"nvidia.com/gpu.engines.ofa":      "0",
+			},
+		},
+		{
+			description:  "mig mixed appends shared",
+			resourceName: "nvidia.com/mig-1g.1gb",
+			count:        1,
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/gpu",
+						Rename:   "nvidia.com/gpu.shared",
+						Replicas: 2,
+					},
+					{
+						Name:     "nvidia.com/mig-1g.1gb",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/mig-1g.1gb.count":            "1",
+				"nvidia.com/mig-1g.1gb.replicas":         "2",
+				"nvidia.com/mig-1g.1gb.sharing-strategy": "time-slicing",
+				"nvidia.com/mig-1g.1gb.memory":           "300",
+				"nvidia.com/mig-1g.1gb.product":          "MOCKMODEL-MIG-1g.300gb-SHARED",
+				"nvidia.com/mig-1g.1gb.multiprocessors":  "0",
+				"nvidia.com/mig-1g.1gb.slices.gi":        "1",
+				"nvidia.com/mig-1g.1gb.slices.ci":        "2",
+				"nvidia.com/mig-1g.1gb.engines.copy":     "0",
+				"nvidia.com/mig-1g.1gb.engines.decoder":  "0",
+				"nvidia.com/mig-1g.1gb.engines.encoder":  "0",
+				"nvidia.com/mig-1g.1gb.engines.jpeg":     "0",
+				"nvidia.com/mig-1g.1gb.engines.ofa":      "0",
+			},
+		},
+		{
+			description:  "mig mixed rename does not append",
+			resourceName: "nvidia.com/mig-1g.1gb",
+			count:        1,
+			timeSlicing: spec.ReplicatedResources{
+				Resources: []spec.ReplicatedResource{
+					{
+						Name:     "nvidia.com/mig-1g.1gb",
+						Rename:   "nvidia.com/mig-1g.1gb.shared",
+						Replicas: 2,
+					},
+				},
+			},
+			expectedLabels: Labels{
+				"nvidia.com/mig-1g.1gb.count":            "1",
+				"nvidia.com/mig-1g.1gb.replicas":         "2",
+				"nvidia.com/mig-1g.1gb.sharing-strategy": "time-slicing",
+				"nvidia.com/mig-1g.1gb.memory":           "300",
+				"nvidia.com/mig-1g.1gb.product":          "MOCKMODEL-MIG-1g.300gb",
+				"nvidia.com/mig-1g.1gb.multiprocessors":  "0",
+				"nvidia.com/mig-1g.1gb.slices.gi":        "1",
+				"nvidia.com/mig-1g.1gb.slices.ci":        "2",
+				"nvidia.com/mig-1g.1gb.engines.copy":     "0",
+				"nvidia.com/mig-1g.1gb.engines.decoder":  "0",
+				"nvidia.com/mig-1g.1gb.engines.encoder":  "0",
+				"nvidia.com/mig-1g.1gb.engines.jpeg":     "0",
+				"nvidia.com/mig-1g.1gb.engines.ofa":      "0",
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			config := &spec.Config{
+				Sharing: spec.Sharing{
+					TimeSlicing: tc.timeSlicing,
+				},
+			}
+			l, err := NewMIGResourceLabeler(tc.resourceName, config, device, tc.count)
+			require.NoError(t, err)
+
+			labels, err := l.Labels()
+			require.NoError(t, err)
+
+			require.EqualValues(t, tc.expectedLabels, labels)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/strategy.go b/pkg/nvidia-plugin/pkg/lm/strategy.go
new file mode 100644
index 000000000..170adc336
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/strategy.go
@@ -0,0 +1,28 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+// migStrategyLabeler creates a labler for setting the mig strategy label
+func migStrategyLabeler(strategy string) Labeler {
+	if strategy == MigStrategyNone {
+		return empty{}
+	}
+
+	return Labels{
+		"nvidia.com/mig.strategy": strategy,
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/timestamp.go b/pkg/nvidia-plugin/pkg/lm/timestamp.go
new file mode 100644
index 000000000..9ea1d0e1e
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/timestamp.go
@@ -0,0 +1,37 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"fmt"
+	"time"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+// NewTimestampLabeler creates a new label manager for generating timestamp
+// labels from the specified config. If the noTimestamp option is set an empty
+// label manager is returned.
+func NewTimestampLabeler(config *spec.Config) Labeler {
+	if *config.Flags.GFD.NoTimestamp {
+		return empty{}
+	}
+
+	return Labels{
+		"nvidia.com/gfd.timestamp": fmt.Sprintf("%d", time.Now().Unix()),
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/lm/vgpu.go b/pkg/nvidia-plugin/pkg/lm/vgpu.go
new file mode 100644
index 000000000..6c61169b7
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/lm/vgpu.go
@@ -0,0 +1,58 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lm
+
+import (
+	"fmt"
+	"strconv"
+
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/vgpu"
+)
+
+// vgpuLabeler manages VGPUs labels for the node
+type vgpuLabeler struct {
+	lib vgpu.Interface
+}
+
+// NewVGPULabeler creates a new VGP label manager using the provided vgpu library
+// and config.
+func NewVGPULabeler(vgpu vgpu.Interface) Labeler {
+	return vgpuLabeler{lib: vgpu}
+}
+
+// Labels generates the VGPU labels for the node
+func (manager vgpuLabeler) Labels() (Labels, error) {
+	devices, err := manager.lib.Devices()
+	if err != nil {
+		klog.ErrorS(err, "unable to get vGPU devices")
+		return nil, nil
+	}
+	labels := Labels{
+		"nvidia.com/vgpu.present": strconv.FormatBool(len(devices) > 0),
+	}
+	for _, device := range devices {
+		info, err := device.GetInfo()
+		if err != nil {
+			return nil, fmt.Errorf("error getting vGPU device info: %v", err)
+		}
+		labels["nvidia.com/vgpu.host-driver-version"] = info.HostDriverVersion
+		labels["nvidia.com/vgpu.host-driver-branch"] = info.HostDriverBranch
+	}
+	return labels, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/logger/klog.go b/pkg/nvidia-plugin/pkg/logger/klog.go
new file mode 100644
index 000000000..5cbfba7d0
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/logger/klog.go
@@ -0,0 +1,34 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package logger
+
+import "k8s.io/klog/v2"
+
+type toKlog struct{}
+
+// ToKlog allows the klog logger to be passed to functions where this is needed.
+var ToKlog = &toKlog{}
+
+// Warning forwards the arguments to the klog.Warning function.
+func (l toKlog) Warning(args ...interface{}) {
+	klog.Warning(args...)
+}
+
+// Warningf forwards the arguments to the klog.Warningf function.
+func (l toKlog) Warningf(format string, args ...interface{}) {
+	klog.Warningf(format, args...)
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/mig/mig.go b/pkg/nvidia-plugin/pkg/mig/mig-dp.go
similarity index 70%
rename from pkg/device-plugin/nvidiadevice/nvinternal/mig/mig.go
rename to pkg/nvidia-plugin/pkg/mig/mig-dp.go
index cc38b4d25..f3121d975 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/mig/mig.go
+++ b/pkg/nvidia-plugin/pkg/mig/mig-dp.go
@@ -1,34 +1,4 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
+// Copyright (c) 2021 - 2022, NVIDIA CORPORATION. All rights reserved.
 
 package mig
 
@@ -49,7 +19,7 @@ const (
 	nvcapsDevicePath     = "/dev/nvidia-caps"
 )
 
-// GetMigCapabilityDevicePaths returns a mapping of MIG capability path to device node path.
+// GetMigCapabilityDevicePaths returns a mapping of MIG capability path to device node path
 func GetMigCapabilityDevicePaths() (map[string]string, error) {
 	// Open nvcapsMigMinorsPath for walking.
 	// If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
diff --git a/pkg/nvidia-plugin/pkg/mig/mig.go b/pkg/nvidia-plugin/pkg/mig/mig.go
new file mode 100644
index 000000000..0d5754209
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/mig/mig.go
@@ -0,0 +1,124 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package mig
+
+import (
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+// DeviceInfo stores information about all devices on the node
+type DeviceInfo struct {
+	// The NVML library
+	manager resource.Manager
+	// devicesMap holds a list of devices, separated by whether they have MigEnabled or not
+	devicesMap map[bool][]resource.Device
+}
+
+// NewDeviceInfo creates a new DeviceInfo struct and returns a pointer to it.
+func NewDeviceInfo(manager resource.Manager) *DeviceInfo {
+	return &DeviceInfo{
+		manager:    manager,
+		devicesMap: nil, // Is initialized on first use
+	}
+}
+
+// GetDevicesMap returns the list of devices separated by whether they have MIG enabled.
+// The first call will construct the map.
+func (di *DeviceInfo) GetDevicesMap() (map[bool][]resource.Device, error) {
+	if di.devicesMap != nil {
+		return di.devicesMap, nil
+	}
+
+	devices, err := di.manager.GetDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	migEnabledDevicesMap := make(map[bool][]resource.Device)
+	for _, d := range devices {
+		isMigEnabled, err := d.IsMigEnabled()
+		if err != nil {
+			return nil, err
+		}
+
+		migEnabledDevicesMap[isMigEnabled] = append(migEnabledDevicesMap[isMigEnabled], d)
+	}
+
+	di.devicesMap = migEnabledDevicesMap
+
+	return di.devicesMap, nil
+}
+
+// GetDevicesWithMigEnabled returns a list of devices with migEnabled=true
+func (di *DeviceInfo) GetDevicesWithMigEnabled() ([]resource.Device, error) {
+	devicesMap, err := di.GetDevicesMap()
+	if err != nil {
+		return nil, err
+	}
+	return devicesMap[true], nil
+}
+
+// GetDevicesWithMigDisabled returns a list of devices with migEnabled=false
+func (di *DeviceInfo) GetDevicesWithMigDisabled() ([]resource.Device, error) {
+	devicesMap, err := di.GetDevicesMap()
+	if err != nil {
+		return nil, err
+	}
+	return devicesMap[false], nil
+}
+
+// AnyMigEnabledDeviceIsEmpty checks whether at least one MIG device has no MIG devices configured
+func (di *DeviceInfo) AnyMigEnabledDeviceIsEmpty() (bool, error) {
+	devicesMap, err := di.GetDevicesMap()
+	if err != nil {
+		return false, err
+	}
+
+	if len(devicesMap[true]) == 0 {
+		// By definition the property is true for the empty set
+		return true, nil
+	}
+
+	for _, d := range devicesMap[true] {
+		migs, err := d.GetMigDevices()
+		if err != nil {
+			return false, err
+		}
+		if len(migs) == 0 {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+// GetAllMigDevices returns a list of all MIG devices.
+func (di *DeviceInfo) GetAllMigDevices() ([]resource.Device, error) {
+	devicesMap, err := di.GetDevicesMap()
+	if err != nil {
+		return nil, err
+	}
+
+	var migs []resource.Device
+	for _, d := range devicesMap[true] {
+		devs, err := d.GetMigDevices()
+		if err != nil {
+			return nil, err
+		}
+		migs = append(migs, devs...)
+	}
+	return migs, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/plugin/api.go b/pkg/nvidia-plugin/pkg/plugin/api.go
new file mode 100644
index 000000000..ce37316f9
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/plugin/api.go
@@ -0,0 +1,26 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package plugin
+
+import "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+
+// Interface defines the API for the plugin package
+type Interface interface {
+	Devices() rm.Devices
+	Start(string) error
+	Stop() error
+}
diff --git a/pkg/nvidia-plugin/pkg/plugin/factory.go b/pkg/nvidia-plugin/pkg/plugin/factory.go
new file mode 100644
index 000000000..cd9952d11
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/plugin/factory.go
@@ -0,0 +1,138 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package plugin
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cdi"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+)
+
+type options struct {
+	infolib   info.Interface
+	nvmllib   nvml.Interface
+	devicelib device.Interface
+
+	failOnInitError bool
+
+	cdiHandler cdi.Interface
+	config     *nvidia.DeviceConfig
+
+	deviceListStrategies spec.DeviceListStrategies
+
+	imexChannels imex.Channels
+}
+
+// New a new set of plugins with the supplied options.
+func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) ([]Interface, error) {
+	o := &options{
+		infolib:   infolib,
+		nvmllib:   nvmllib,
+		devicelib: devicelib,
+	}
+	for _, opt := range opts {
+		opt(o)
+	}
+
+	if o.config == nil {
+		klog.Warning("no config provided, returning a null manager")
+		return nil, nil
+	}
+
+	if o.cdiHandler == nil {
+		o.cdiHandler = cdi.NewNullHandler()
+	}
+
+	resourceManagers, err := o.getResourceManagers()
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct resource managers: %w", err)
+	}
+
+	var plugins []Interface
+	for _, resourceManager := range resourceManagers {
+		plugin, err := o.devicePluginForResource(resourceManager)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create plugin: %w", err)
+		}
+		plugins = append(plugins, plugin)
+	}
+	return plugins, nil
+}
+
+// getResourceManager constructs a set of resource managers.
+// Each resource manager maps to a specific named extended resource and may
+// include full GPUs or MIG devices.
+func (o *options) getResourceManagers() ([]rm.ResourceManager, error) {
+	strategy := o.resolveStrategy(*o.config.Flags.DeviceDiscoveryStrategy)
+	switch strategy {
+	case "nvml":
+		ret := o.nvmllib.Init()
+		if ret != nvml.SUCCESS {
+			klog.Errorf("Failed to initialize NVML: %v.", ret)
+			klog.Errorf("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
+			klog.Errorf("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
+			klog.Errorf("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
+			klog.Errorf("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
+			if o.failOnInitError {
+				return nil, fmt.Errorf("nvml init failed: %v", ret)
+			}
+			klog.Warningf("nvml init failed: %v", ret)
+			return nil, nil
+		}
+		defer func() {
+			_ = o.nvmllib.Shutdown()
+		}()
+
+		return rm.NewNVMLResourceManagers(o.infolib, o.nvmllib, o.devicelib, o.config)
+	case "tegra":
+		return rm.NewTegraResourceManagers(o.config)
+	default:
+		klog.Errorf("Incompatible strategy detected %v", strategy)
+		klog.Error("If this is a GPU node, did you configure the NVIDIA Container Toolkit?")
+		klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
+		klog.Error("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
+		klog.Error("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
+		if o.failOnInitError {
+			return nil, fmt.Errorf("invalid device discovery strategy")
+		}
+		return nil, nil
+	}
+}
+
+func (o *options) resolveStrategy(strategy string) string {
+	if strategy != "" && strategy != "auto" {
+		return strategy
+	}
+
+	platform := o.infolib.ResolvePlatform()
+	switch platform {
+	case info.PlatformNVML, info.PlatformWSL:
+		return "nvml"
+	case info.PlatformTegra:
+		return "tegra"
+	}
+	return strategy
+}
diff --git a/pkg/nvidia-plugin/pkg/plugin/mps.go b/pkg/nvidia-plugin/pkg/plugin/mps.go
new file mode 100644
index 000000000..c4b304f07
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/plugin/mps.go
@@ -0,0 +1,91 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package plugin
+
+import (
+	"errors"
+	"fmt"
+
+	"k8s.io/klog/v2"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mps"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+)
+
+type mpsOptions struct {
+	enabled      bool
+	resourceName spec.ResourceName
+	daemon       *mps.Daemon
+	hostRoot     mps.Root
+}
+
+// getMPSOptions returns the MPS options specified for the resource manager.
+// If MPS is not configured and empty set of options is returned.
+func (o *options) getMPSOptions(resourceManager rm.ResourceManager) (mpsOptions, error) {
+	if o.config.Sharing.SharingStrategy() != spec.SharingStrategyMPS {
+		return mpsOptions{}, nil
+	}
+
+	// TODO: It might make sense to pull this logic into a resource manager.
+	for _, device := range resourceManager.Devices() {
+		if device.IsMigDevice() {
+			return mpsOptions{}, errors.New("sharing using MPS is not supported for MIG devices")
+		}
+	}
+
+	m := mpsOptions{
+		enabled:      true,
+		resourceName: resourceManager.Resource(),
+		daemon:       mps.NewDaemon(resourceManager, mps.ContainerRoot),
+		hostRoot:     mps.Root(*o.config.Flags.CommandLineFlags.MpsRoot),
+	}
+	return m, nil
+}
+
+func (m *mpsOptions) waitForDaemon() error {
+	if m == nil || !m.enabled {
+		return nil
+	}
+	// TODO: Check the .ready file here.
+	// TODO: Have some retry strategy here.
+	if err := m.daemon.AssertHealthy(); err != nil {
+		return fmt.Errorf("error checking MPS daemon health: %w", err)
+	}
+	klog.InfoS("MPS daemon is healthy", "resource", m.resourceName)
+	return nil
+}
+
+func (m *mpsOptions) updateReponse(response *pluginapi.ContainerAllocateResponse) {
+	if m == nil || !m.enabled {
+		return
+	}
+	// TODO: We should check that the deviceIDs are shared using MPS.
+	response.Envs["CUDA_MPS_PIPE_DIRECTORY"] = m.daemon.PipeDir()
+
+	response.Mounts = append(response.Mounts,
+		&pluginapi.Mount{
+			ContainerPath: m.daemon.PipeDir(),
+			HostPath:      m.hostRoot.PipeDir(m.resourceName),
+		},
+		&pluginapi.Mount{
+			ContainerPath: m.daemon.ShmDir(),
+			HostPath:      m.hostRoot.ShmDir(m.resourceName),
+		},
+	)
+}
diff --git a/pkg/nvidia-plugin/pkg/plugin/options.go b/pkg/nvidia-plugin/pkg/plugin/options.go
new file mode 100644
index 000000000..7e3799248
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/plugin/options.go
@@ -0,0 +1,79 @@
+/**
+# Copyright (c) NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package plugin
+
+import (
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cdi"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+)
+
+// Option is a function that configures a options
+type Option func(*options)
+
+// WithCDIHandler sets the CDI handler for the options
+func WithCDIHandler(handler cdi.Interface) Option {
+	return func(m *options) {
+		m.cdiHandler = handler
+	}
+}
+
+// WithDeviceListStrategies sets the device list strategies.
+func WithDeviceListStrategies(deviceListStrategies spec.DeviceListStrategies) Option {
+	return func(m *options) {
+		m.deviceListStrategies = deviceListStrategies
+	}
+}
+
+// WithNVML sets the NVML handler for the options
+func WithNVML(nvmllib nvml.Interface) Option {
+	return func(m *options) {
+		m.nvmllib = nvmllib
+	}
+}
+
+// WithInfoLib sets the info lib for the options.
+func WithInfoLib(infolib info.Interface) Option {
+	return func(m *options) {
+		m.infolib = infolib
+	}
+}
+
+// WithFailOnInitError sets whether the options should fail on initialization errors
+func WithFailOnInitError(failOnInitError bool) Option {
+	return func(m *options) {
+		m.failOnInitError = failOnInitError
+	}
+}
+
+// WithConfig sets the config reference for the options
+func WithConfig(config *nvidia.DeviceConfig) Option {
+	return func(m *options) {
+		m.config = config
+	}
+}
+
+// WithImexChannels sets the imex channels for the manager.
+func WithImexChannels(imexChannels imex.Channels) Option {
+	return func(m *options) {
+		m.imexChannels = imexChannels
+	}
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go b/pkg/nvidia-plugin/pkg/plugin/register.go
similarity index 98%
rename from pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
rename to pkg/nvidia-plugin/pkg/plugin/register.go
index 0da4db014..c78141ea9 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
+++ b/pkg/nvidia-plugin/pkg/plugin/register.go
@@ -94,7 +94,7 @@ func parseNvidiaNumaInfo(idx int, nvidiaTopoStr string) (int, error) {
 		}
 		klog.V(5).InfoS("nvidia-smi topo -m row output", "row output", words, "length", len(words))
 		if strings.Contains(words[0], fmt.Sprint(idx)) {
-			if words[numaAffinityColumnIndex] == "N/A" {
+			if len(words) <= numaAffinityColumnIndex || words[numaAffinityColumnIndex] == "N/A" {
 				klog.InfoS("current card has not established numa topology", "gpu row info", words, "index", idx)
 				return 0, nil
 			}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register_test.go b/pkg/nvidia-plugin/pkg/plugin/register_test.go
similarity index 91%
rename from pkg/device-plugin/nvidiadevice/nvinternal/plugin/register_test.go
rename to pkg/nvidia-plugin/pkg/plugin/register_test.go
index 5c1530878..ab12dcd40 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register_test.go
+++ b/pkg/nvidia-plugin/pkg/plugin/register_test.go
@@ -47,7 +47,7 @@ func Test_parseNvidiaNumaInfo(t *testing.T) {
 			name: "single Tesla P4 NUMA",
 			idx:  0,
 			nvidiaTopoStr: `GPU0    CPU Affinity    NUMA Affinity ...
-                            ...`,
+							 ...`,
 			want:    0,
 			wantErr: false,
 		},
@@ -55,7 +55,7 @@ func Test_parseNvidiaNumaInfo(t *testing.T) {
 			name: "two Tesla P4 NUMA topo with index 0",
 			idx:  0,
 			nvidiaTopoStr: `GPU0    GPU1    CPU Affinity    NUMA Affinity ...
-                            ...`,
+							 ...`,
 			want:    0,
 			wantErr: false,
 		},
@@ -63,7 +63,15 @@ func Test_parseNvidiaNumaInfo(t *testing.T) {
 			name: "two Tesla P4 NUMA topo with index 1",
 			idx:  1,
 			nvidiaTopoStr: `GPU0    GPU1    CPU Affinity    NUMA Affinity ...
-                            ...`,
+							 ...`,
+			want:    0,
+			wantErr: false,
+		},
+		{
+			name: "NUMA Affinity is empty",
+			idx:  0,
+			nvidiaTopoStr: `GPU0	CPU Affinity	NUMA Affinity	GPU NUMA ID
+ GPU0	X`,
 			want:    0,
 			wantErr: false,
 		},
diff --git a/pkg/nvidia-plugin/pkg/plugin/server.go b/pkg/nvidia-plugin/pkg/plugin/server.go
new file mode 100644
index 000000000..37643f21a
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/plugin/server.go
@@ -0,0 +1,742 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package plugin
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	"k8s.io/apimachinery/pkg/util/yaml"
+	"k8s.io/klog/v2"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+	cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
+
+	"github.com/Project-HAMi/HAMi/pkg/device"
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cdi"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
+	"github.com/Project-HAMi/HAMi/pkg/util"
+)
+
+const (
+	deviceListEnvVar                          = "NVIDIA_VISIBLE_DEVICES"
+	deviceListAsVolumeMountsHostPath          = "/dev/null"
+	deviceListAsVolumeMountsContainerPathRoot = "/var/run/nvidia-container-devices"
+	NodeLockNvidia                            = "hami.io/mutex.lock"
+)
+
+var (
+	hostHookPath string
+	ConfigFile   *string
+)
+
+func init() {
+	hostHookPath, _ = os.LookupEnv("HOOK_PATH")
+}
+
+// NvidiaDevicePlugin implements the Kubernetes device plugin API
+type NvidiaDevicePlugin struct {
+	rm                   rm.ResourceManager
+	config               *nvidia.DeviceConfig
+	deviceListStrategies spec.DeviceListStrategies
+
+	cdiHandler          cdi.Interface
+	cdiAnnotationPrefix string
+
+	socket string
+	server *grpc.Server
+	health chan *rm.Device
+	stop   chan interface{}
+
+	imexChannels imex.Channels
+
+	mps mpsOptions
+
+	operatingMode   string
+	migCurrent      nvidia.MigPartedSpec
+	schedulerConfig nvidia.NvidiaConfig
+}
+
+// devicePluginForResource creates a device plugin for the specified resource.
+func (o *options) devicePluginForResource(resourceManager rm.ResourceManager) (Interface, error) {
+	mpsOptions, err := o.getMPSOptions(resourceManager)
+	if err != nil {
+		return nil, err
+	}
+	sConfig, mode, err := LoadNvidiaDevicePluginConfig()
+	if err != nil {
+		return nil, fmt.Errorf("failed to load nvidia plugin config: %v", err)
+	}
+
+	// Initialize devices with configuration
+	if err := device.InitDevicesWithConfig(sConfig); err != nil {
+		klog.Fatalf("failed to initialize devices: %v", err)
+	}
+
+	plugin := NvidiaDevicePlugin{
+		rm:                   resourceManager,
+		config:               o.config,
+		deviceListStrategies: o.deviceListStrategies,
+
+		cdiHandler:          o.cdiHandler,
+		cdiAnnotationPrefix: *o.config.Flags.Plugin.CDIAnnotationPrefix,
+
+		imexChannels: o.imexChannels,
+
+		mps: mpsOptions,
+
+		socket: getPluginSocketPath(resourceManager.Resource()),
+		// These will be reinitialized every
+		// time the plugin server is restarted.
+		server: nil,
+		health: nil,
+		stop:   nil,
+
+		// initialize the the Hami fields
+		operatingMode:   mode,
+		schedulerConfig: sConfig.NvidiaConfig,
+		migCurrent:      nvidia.MigPartedSpec{},
+	}
+	return &plugin, nil
+}
+
+func readFromConfigFile(sConfig *nvidia.NvidiaConfig) (string, error) {
+	jsonByte, err := os.ReadFile("/config/config.json")
+	mode := "hami-core"
+	if err != nil {
+		return "", err
+	}
+	var deviceConfigs nvidia.DevicePluginConfigs
+	err = json.Unmarshal(jsonByte, &deviceConfigs)
+	if err != nil {
+		return "", err
+	}
+	klog.Infof("Device Plugin Configs: %v", fmt.Sprintf("%v", deviceConfigs))
+	for _, val := range deviceConfigs.Nodeconfig {
+		if os.Getenv(util.NodeNameEnvName) == val.Name {
+			klog.Infof("Reading config from file %s", val.Name)
+			if val.Devicememoryscaling > 0 {
+				sConfig.DeviceMemoryScaling = val.Devicememoryscaling
+			}
+			if val.Devicecorescaling > 0 {
+				sConfig.DeviceCoreScaling = val.Devicecorescaling
+			}
+			if val.Devicesplitcount > 0 {
+				sConfig.DeviceSplitCount = val.Devicesplitcount
+			}
+			if val.FilterDevice != nil && (len(val.FilterDevice.UUID) > 0 || len(val.FilterDevice.Index) > 0) {
+				nvidia.DevicePluginFilterDevice = val.FilterDevice
+			}
+			if len(val.OperatingMode) > 0 {
+				mode = val.OperatingMode
+			}
+			klog.Infof("FilterDevice: %v", val.FilterDevice)
+		}
+	}
+	return mode, nil
+}
+
+func LoadNvidiaDevicePluginConfig() (*device.Config, string, error) {
+	sConfig, err := device.LoadConfig(*ConfigFile)
+	if err != nil {
+		klog.Fatalf(`failed to load device config file %s: %v`, *ConfigFile, err)
+	}
+	mode, err := readFromConfigFile(&sConfig.NvidiaConfig)
+	if err != nil {
+		klog.Errorf("readFromConfigFile err:%s", err.Error())
+	}
+	return sConfig, mode, nil
+}
+
+// getPluginSocketPath returns the socket to use for the specified resource.
+func getPluginSocketPath(resource spec.ResourceName) string {
+	_, name := resource.Split()
+	pluginName := "nvidia-" + name
+	return filepath.Join(pluginapi.DevicePluginPath, pluginName) + ".sock"
+}
+
+func (plugin *NvidiaDevicePlugin) initialize() {
+	plugin.server = grpc.NewServer([]grpc.ServerOption{}...)
+	plugin.health = make(chan *rm.Device)
+	plugin.stop = make(chan interface{})
+}
+
+func (plugin *NvidiaDevicePlugin) cleanup() {
+	close(plugin.stop)
+	plugin.server = nil
+	plugin.health = nil
+	plugin.stop = nil
+}
+
+// Devices returns the full set of devices associated with the plugin.
+func (plugin *NvidiaDevicePlugin) Devices() rm.Devices {
+	return plugin.rm.Devices()
+}
+
+// Start starts the gRPC server, registers the device plugin with the Kubelet,
+// and starts the device healthchecks.
+func (plugin *NvidiaDevicePlugin) Start(kubeletSocket string) error {
+	plugin.initialize()
+
+	if err := plugin.mps.waitForDaemon(); err != nil {
+		return fmt.Errorf("error waiting for MPS daemon: %w", err)
+	}
+
+	err := plugin.Serve()
+	if err != nil {
+		klog.Errorf("Could not start device plugin for '%s': %s", plugin.rm.Resource(), err)
+		plugin.cleanup()
+		return err
+	}
+	klog.Infof("Starting to serve '%s' on %s", plugin.rm.Resource(), plugin.socket)
+
+	err = plugin.Register(kubeletSocket)
+	if err != nil {
+		klog.Errorf("Could not register device plugin: %s", err)
+		return errors.Join(err, plugin.Stop())
+	}
+	klog.Infof("Registered device plugin for '%s' with Kubelet", plugin.rm.Resource())
+
+	if plugin.operatingMode == "mig" {
+		cmd := exec.Command("nvidia-mig-parted", "export")
+		var stdout, stderr bytes.Buffer
+		cmd.Stdout = &stdout
+		cmd.Stderr = &stderr
+		err := cmd.Run()
+		if err != nil {
+			klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
+		}
+		outStr := stdout.Bytes()
+		yaml.Unmarshal(outStr, &plugin.migCurrent)
+		os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm)
+		if len(plugin.migCurrent.MigConfigs["current"]) == 1 && len(plugin.migCurrent.MigConfigs["current"][0].Devices) == 0 {
+			idx := 0
+			plugin.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0)
+			for idx < GetDeviceNums() {
+				plugin.migCurrent.MigConfigs["current"][0].Devices = append(plugin.migCurrent.MigConfigs["current"][0].Devices, int32(idx))
+				idx++
+			}
+		}
+		klog.Infoln("Mig export", plugin.migCurrent)
+	}
+
+	go func() {
+		// TODO: add MPS health check
+		err := plugin.rm.CheckHealth(plugin.stop, plugin.health)
+		if err != nil {
+			klog.Errorf("Failed to start health check: %v; continuing with health checks disabled", err)
+		}
+	}()
+
+	go func() {
+		plugin.WatchAndRegister()
+	}()
+
+	return nil
+}
+
+// Stop stops the gRPC server.
+func (plugin *NvidiaDevicePlugin) Stop() error {
+	if plugin == nil || plugin.server == nil {
+		return nil
+	}
+	klog.Infof("Stopping to serve '%s' on %s", plugin.rm.Resource(), plugin.socket)
+	plugin.server.Stop()
+	if err := os.Remove(plugin.socket); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	plugin.cleanup()
+	return nil
+}
+
+// Serve starts the gRPC server of the device plugin.
+func (plugin *NvidiaDevicePlugin) Serve() error {
+	os.Remove(plugin.socket)
+	sock, err := net.Listen("unix", plugin.socket)
+	if err != nil {
+		return err
+	}
+
+	pluginapi.RegisterDevicePluginServer(plugin.server, plugin)
+
+	go func() {
+		lastCrashTime := time.Now()
+		restartCount := 0
+
+		for {
+			// quite if it has been restarted too often
+			// i.e. if server has crashed more than 5 times and it didn't last more than one hour each time
+			if restartCount > 5 {
+				// quit
+				klog.Fatalf("GRPC server for '%s' has repeatedly crashed recently. Quitting", plugin.rm.Resource())
+			}
+
+			klog.Infof("Starting GRPC server for '%s'", plugin.rm.Resource())
+			err := plugin.server.Serve(sock)
+			if err == nil {
+				break
+			}
+
+			klog.Infof("GRPC server for '%s' crashed with error: %v", plugin.rm.Resource(), err)
+
+			timeSinceLastCrash := time.Since(lastCrashTime).Seconds()
+			lastCrashTime = time.Now()
+			if timeSinceLastCrash > 3600 {
+				// it has been one hour since the last crash.. reset the count
+				// to reflect on the frequency
+				restartCount = 0
+			} else {
+				restartCount++
+			}
+		}
+	}()
+
+	// Wait for server to start by launching a blocking connection
+	conn, err := plugin.dial(plugin.socket, 5*time.Second)
+	if err != nil {
+		return err
+	}
+	conn.Close()
+
+	return nil
+}
+
+// Register registers the device plugin for the given resourceName with Kubelet.
+func (plugin *NvidiaDevicePlugin) Register(kubeletSocket string) error {
+	if kubeletSocket == "" {
+		klog.Info("Skipping registration with Kubelet")
+		return nil
+	}
+
+	conn, err := plugin.dial(kubeletSocket, 5*time.Second)
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	client := pluginapi.NewRegistrationClient(conn)
+	reqt := &pluginapi.RegisterRequest{
+		Version:      pluginapi.Version,
+		Endpoint:     path.Base(plugin.socket),
+		ResourceName: string(plugin.rm.Resource()),
+		Options: &pluginapi.DevicePluginOptions{
+			GetPreferredAllocationAvailable: true,
+		},
+	}
+
+	_, err = client.Register(context.Background(), reqt)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// GetDevicePluginOptions returns the values of the optional settings for this plugin
+func (plugin *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
+	options := &pluginapi.DevicePluginOptions{
+		GetPreferredAllocationAvailable: true,
+	}
+	return options, nil
+}
+
+// ListAndWatch lists devices and update that list according to the health status
+func (plugin *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
+	if err := s.Send(&pluginapi.ListAndWatchResponse{Devices: plugin.apiDevices()}); err != nil {
+		return err
+	}
+
+	for {
+		select {
+		case <-plugin.stop:
+			return nil
+		case d := <-plugin.health:
+			// FIXME: there is no way to recover from the Unhealthy state.
+			d.Health = pluginapi.Unhealthy
+			klog.Infof("'%s' device marked unhealthy: %s", plugin.rm.Resource(), d.ID)
+			if err := s.Send(&pluginapi.ListAndWatchResponse{Devices: plugin.apiDevices()}); err != nil {
+				return nil
+			}
+		}
+	}
+}
+
+// GetPreferredAllocation returns the preferred allocation from the set of devices specified in the request
+func (plugin *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
+	response := &pluginapi.PreferredAllocationResponse{}
+	for _, req := range r.ContainerRequests {
+		devices, err := plugin.rm.GetPreferredAllocation(req.AvailableDeviceIDs, req.MustIncludeDeviceIDs, int(req.AllocationSize))
+		if err != nil {
+			return nil, fmt.Errorf("error getting list of preferred allocation devices: %v", err)
+		}
+
+		resp := &pluginapi.ContainerPreferredAllocationResponse{
+			DeviceIDs: devices,
+		}
+
+		response.ContainerResponses = append(response.ContainerResponses, resp)
+	}
+	return response, nil
+}
+
+// Allocate which return list of devices.
+func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
+	responses := pluginapi.AllocateResponse{}
+
+	nodeName := os.Getenv(util.NodeNameEnvName)
+	current, err := util.GetPendingPod(ctx, nodeName)
+	if err != nil {
+		return &responses, err
+	}
+
+	for idx, req := range reqs.ContainerRequests {
+		if err := plugin.rm.ValidateRequest(req.DevicesIDs); err != nil {
+			return nil, fmt.Errorf("invalid allocation request for %q: %w", plugin.rm.Resource(), err)
+		}
+		currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current)
+		klog.Infoln("deviceAllocateFromAnnotation=", devreq)
+		if err != nil {
+			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
+			return &responses, err
+		}
+		if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) {
+			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
+			return &responses, errors.New("device number not matched")
+		}
+		response, err := plugin.getAllocateResponse(plugin.GetContainerDeviceStrArray(devreq))
+		if err != nil {
+			return nil, fmt.Errorf("failed to get allocate response: %v", err)
+		}
+
+		err = EraseNextDeviceTypeFromAnnotation(nvidia.NvidiaGPUDevice, *current)
+		if err != nil {
+			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
+			return &responses, err
+		}
+
+		if plugin.operatingMode != "mig" {
+			for i, dev := range devreq {
+				limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
+				response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem)
+			}
+			response.Envs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprint(devreq[0].Usedcores)
+			response.Envs["CUDA_DEVICE_MEMORY_SHARED_CACHE"] = fmt.Sprintf("%s/vgpu/%v.cache", hostHookPath, uuid.New().String())
+			if plugin.schedulerConfig.DeviceMemoryScaling > 1 {
+				response.Envs["CUDA_OVERSUBSCRIBE"] = "true"
+			}
+			if plugin.schedulerConfig.DisableCoreLimit {
+				response.Envs[util.CoreLimitSwitch] = "disable"
+			}
+			cacheFileHostDirectory := fmt.Sprintf("%s/vgpu/containers/%s_%s", hostHookPath, current.UID, currentCtr.Name)
+			os.RemoveAll(cacheFileHostDirectory)
+
+			os.MkdirAll(cacheFileHostDirectory, 0777)
+			os.Chmod(cacheFileHostDirectory, 0777)
+			os.MkdirAll("/tmp/vgpulock", 0777)
+			os.Chmod("/tmp/vgpulock", 0777)
+			response.Mounts = append(response.Mounts,
+				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu/libvgpu.so", hostHookPath),
+					HostPath: GetLibPath(),
+					ReadOnly: true},
+				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu", hostHookPath),
+					HostPath: cacheFileHostDirectory,
+					ReadOnly: false},
+				&pluginapi.Mount{ContainerPath: "/tmp/vgpulock",
+					HostPath: "/tmp/vgpulock",
+					ReadOnly: false},
+			)
+			found := false
+			for _, val := range currentCtr.Env {
+				if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
+					// if env existed but is set to false or can not be parsed, ignore
+					t, _ := strconv.ParseBool(val.Value)
+					if !t {
+						continue
+					}
+					// only env existed and set to true, we mark it "found"
+					found = true
+					break
+				}
+			}
+			if !found {
+				response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
+					HostPath: hostHookPath + "/vgpu/ld.so.preload",
+					ReadOnly: true},
+				)
+			}
+			_, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath))
+			if err == nil {
+				response.Mounts = append(response.Mounts, &pluginapi.Mount{
+					ContainerPath: "/tmp/license",
+					HostPath:      fmt.Sprintf("%s/vgpu/license", hostHookPath),
+					ReadOnly:      true,
+				})
+				response.Mounts = append(response.Mounts, &pluginapi.Mount{
+					ContainerPath: "/usr/bin/vgpuvalidator",
+					HostPath:      fmt.Sprintf("%s/vgpu/vgpuvalidator", hostHookPath),
+					ReadOnly:      true,
+				})
+			}
+		}
+		responses.ContainerResponses = append(responses.ContainerResponses, response)
+	}
+
+	return &responses, nil
+}
+
+func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*pluginapi.ContainerAllocateResponse, error) {
+	deviceIDs := plugin.deviceIDsFromAnnotatedDeviceIDs(requestIds)
+
+	// Create an empty response that will be updated as required below.
+	response := &pluginapi.ContainerAllocateResponse{
+		Envs: make(map[string]string),
+	}
+	if plugin.deviceListStrategies.AnyCDIEnabled() {
+		responseID := uuid.New().String()
+		if err := plugin.updateResponseForCDI(response, responseID, deviceIDs...); err != nil {
+			return nil, fmt.Errorf("failed to get allocate response for CDI: %v", err)
+		}
+	}
+	if plugin.mps.enabled {
+		plugin.updateResponseForMPS(response)
+	}
+
+	// The following modifications are only made if at least one non-CDI device
+	// list strategy is selected.
+	if plugin.deviceListStrategies.AllCDIEnabled() {
+		return response, nil
+	}
+
+	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyEnvVar) {
+		plugin.updateResponseForDeviceListEnvVar(response, deviceIDs...)
+		plugin.updateResponseForImexChannelsEnvVar(response)
+	}
+	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyVolumeMounts) {
+		plugin.updateResponseForDeviceMounts(response, deviceIDs...)
+	}
+	if *plugin.config.Flags.Plugin.PassDeviceSpecs {
+		response.Devices = append(response.Devices, plugin.apiDeviceSpecs(*plugin.config.Flags.NvidiaDevRoot, requestIds)...)
+	}
+	if *plugin.config.Flags.GDSEnabled {
+		response.Envs["NVIDIA_GDS"] = "enabled"
+	}
+	if *plugin.config.Flags.MOFEDEnabled {
+		response.Envs["NVIDIA_MOFED"] = "enabled"
+	}
+	return response, nil
+}
+
+// updateResponseForMPS ensures that the ContainerAllocate response contains the information required to use MPS.
+// This includes per-resource pipe and log directories as well as a global daemon-specific shm
+// and assumes that an MPS control daemon has already been started.
+func (plugin NvidiaDevicePlugin) updateResponseForMPS(response *pluginapi.ContainerAllocateResponse) {
+	plugin.mps.updateReponse(response)
+}
+
+// updateResponseForCDI updates the specified response for the given device IDs.
+// This response contains the annotations required to trigger CDI injection in the container engine or nvidia-container-runtime.
+func (plugin *NvidiaDevicePlugin) updateResponseForCDI(response *pluginapi.ContainerAllocateResponse, responseID string, deviceIDs ...string) error {
+	var devices []string
+	for _, id := range deviceIDs {
+		devices = append(devices, plugin.cdiHandler.QualifiedName("gpu", id))
+	}
+	for _, channel := range plugin.imexChannels {
+		devices = append(devices, plugin.cdiHandler.QualifiedName("imex-channel", channel.ID))
+	}
+	if *plugin.config.Flags.GDSEnabled {
+		devices = append(devices, plugin.cdiHandler.QualifiedName("gds", "all"))
+	}
+	if *plugin.config.Flags.MOFEDEnabled {
+		devices = append(devices, plugin.cdiHandler.QualifiedName("mofed", "all"))
+	}
+
+	if len(devices) == 0 {
+		return nil
+	}
+
+	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyCDIAnnotations) {
+		annotations, err := plugin.getCDIDeviceAnnotations(responseID, devices...)
+		if err != nil {
+			return err
+		}
+		response.Annotations = annotations
+	}
+	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyCDICRI) {
+		for _, device := range devices {
+			cdiDevice := pluginapi.CDIDevice{
+				Name: device,
+			}
+			response.CDIDevices = append(response.CDIDevices, &cdiDevice)
+		}
+	}
+
+	return nil
+}
+
+func (plugin *NvidiaDevicePlugin) getCDIDeviceAnnotations(id string, devices ...string) (map[string]string, error) {
+	annotations, err := cdiapi.UpdateAnnotations(map[string]string{}, "nvidia-device-plugin", id, devices)
+	if err != nil {
+		return nil, fmt.Errorf("failed to add CDI annotations: %v", err)
+	}
+
+	if plugin.cdiAnnotationPrefix == spec.DefaultCDIAnnotationPrefix {
+		return annotations, nil
+	}
+
+	// update annotations if a custom CDI prefix is configured
+	updatedAnnotations := make(map[string]string)
+	for k, v := range annotations {
+		newKey := plugin.cdiAnnotationPrefix + strings.TrimPrefix(k, spec.DefaultCDIAnnotationPrefix)
+		updatedAnnotations[newKey] = v
+	}
+
+	return updatedAnnotations, nil
+}
+
+// PreStartContainer is unimplemented for this plugin
+func (plugin *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
+	return &pluginapi.PreStartContainerResponse{}, nil
+}
+
+// dial establishes the gRPC communication with the registered device plugin.
+func (plugin *NvidiaDevicePlugin) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	//nolint:staticcheck  // TODO: Switch to grpc.NewClient
+	c, err := grpc.DialContext(ctx, unixSocketPath,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		//nolint:staticcheck  // TODO: WithBlock is deprecated.
+		grpc.WithBlock(),
+		grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
+			return (&net.Dialer{}).DialContext(ctx, "unix", addr)
+		}),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	return c, nil
+}
+
+func (plugin *NvidiaDevicePlugin) deviceIDsFromAnnotatedDeviceIDs(ids []string) []string {
+	var deviceIDs []string
+	if *plugin.config.Flags.Plugin.DeviceIDStrategy == spec.DeviceIDStrategyUUID {
+		deviceIDs = rm.AnnotatedIDs(ids).GetIDs()
+	}
+	if *plugin.config.Flags.Plugin.DeviceIDStrategy == spec.DeviceIDStrategyIndex {
+		deviceIDs = plugin.rm.Devices().Subset(ids).GetIndices()
+	}
+	return deviceIDs
+}
+
+func (plugin *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device {
+	return plugin.rm.Devices().GetPluginDevices()
+}
+
+// updateResponseForDeviceListEnvVar sets the environment variable for the requested devices.
+func (plugin *NvidiaDevicePlugin) updateResponseForDeviceListEnvVar(response *pluginapi.ContainerAllocateResponse, deviceIDs ...string) {
+	response.Envs[deviceListEnvVar] = strings.Join(deviceIDs, ",")
+}
+
+// updateResponseForImexChannelsEnvVar sets the environment variable for the requested IMEX channels.
+func (plugin *NvidiaDevicePlugin) updateResponseForImexChannelsEnvVar(response *pluginapi.ContainerAllocateResponse) {
+	var channelIDs []string
+	for _, channel := range plugin.imexChannels {
+		channelIDs = append(channelIDs, channel.ID)
+	}
+	if len(channelIDs) > 0 {
+		response.Envs[spec.ImexChannelEnvVar] = strings.Join(channelIDs, ",")
+	}
+}
+
+// updateResponseForDeviceMounts sets the mounts required to request devices if volume mounts are used.
+func (plugin *NvidiaDevicePlugin) updateResponseForDeviceMounts(response *pluginapi.ContainerAllocateResponse, deviceIDs ...string) {
+	plugin.updateResponseForDeviceListEnvVar(response, deviceListAsVolumeMountsContainerPathRoot)
+
+	for _, id := range deviceIDs {
+		mount := &pluginapi.Mount{
+			HostPath:      deviceListAsVolumeMountsHostPath,
+			ContainerPath: filepath.Join(deviceListAsVolumeMountsContainerPathRoot, id),
+		}
+		response.Mounts = append(response.Mounts, mount)
+	}
+	for _, channel := range plugin.imexChannels {
+		mount := &pluginapi.Mount{
+			HostPath:      deviceListAsVolumeMountsHostPath,
+			ContainerPath: filepath.Join(deviceListAsVolumeMountsContainerPathRoot, "imex", channel.ID),
+		}
+		response.Mounts = append(response.Mounts, mount)
+	}
+}
+
+func (plugin *NvidiaDevicePlugin) apiDeviceSpecs(devRoot string, ids []string) []*pluginapi.DeviceSpec {
+	optional := map[string]bool{
+		"/dev/nvidiactl":        true,
+		"/dev/nvidia-uvm":       true,
+		"/dev/nvidia-uvm-tools": true,
+		"/dev/nvidia-modeset":   true,
+	}
+
+	paths := plugin.rm.GetDevicePaths(ids)
+
+	var specs []*pluginapi.DeviceSpec
+	for _, p := range paths {
+		if optional[p] {
+			if _, err := os.Stat(p); err != nil {
+				continue
+			}
+		}
+		spec := &pluginapi.DeviceSpec{
+			ContainerPath: p,
+			HostPath:      filepath.Join(devRoot, p),
+			Permissions:   "rw",
+		}
+		specs = append(specs, spec)
+	}
+
+	for _, channel := range plugin.imexChannels {
+		spec := &pluginapi.DeviceSpec{
+			ContainerPath: channel.Path,
+			// TODO: The HostPath property for a channel is not the correct value to use here.
+			// The `devRoot` there represents the devRoot in the current container when discovering devices
+			// and is set to "{{ .*config.Flags.Plugin.ContainerDriverRoot }}/dev".
+			// The devRoot in this context is the {{ .config.Flags.NvidiaDevRoot }} and defines the
+			// root for device nodes on the host. This is usually / or /run/nvidia/driver when the
+			// driver container is used.
+			HostPath:    filepath.Join(devRoot, channel.Path),
+			Permissions: "rw",
+		}
+		specs = append(specs, spec)
+	}
+
+	return specs
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server_test.go b/pkg/nvidia-plugin/pkg/plugin/server_test.go
similarity index 58%
rename from pkg/device-plugin/nvidiadevice/nvinternal/plugin/server_test.go
rename to pkg/nvidia-plugin/pkg/plugin/server_test.go
index 1574b3ff7..cce940138 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server_test.go
+++ b/pkg/nvidia-plugin/pkg/plugin/server_test.go
@@ -1,46 +1,31 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
 
 package plugin
 
 import (
-	"fmt"
 	"testing"
 
-	v1 "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/cdi"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
 	"github.com/stretchr/testify/require"
-	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	v1 "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cdi"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/imex"
 )
 
 func TestCDIAllocateResponse(t *testing.T) {
@@ -49,31 +34,22 @@ func TestCDIAllocateResponse(t *testing.T) {
 		deviceIds            []string
 		deviceListStrategies []string
 		CDIPrefix            string
-		CDIEnabled           bool
 		GDSEnabled           bool
 		MOFEDEnabled         bool
-		expectedResponse     kubeletdevicepluginv1beta1.ContainerAllocateResponse
+		imexChannels         []*imex.Channel
+		expectedResponse     pluginapi.ContainerAllocateResponse
 	}{
 		{
 			description:          "empty device list has empty response",
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
-		},
-		{
-			description:          "CDI disabled has empty response",
-			deviceIds:            []string{"gpu0"},
-			deviceListStrategies: []string{"cdi-annotations"},
-			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           false,
 		},
 		{
 			description:          "single device is added to annotations",
 			deviceIds:            []string{"gpu0"},
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0",
 				},
@@ -84,8 +60,7 @@ func TestCDIAllocateResponse(t *testing.T) {
 			deviceIds:            []string{"gpu0"},
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "custom.cdi.k8s.io/",
-			CDIEnabled:           true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"custom.cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0",
 				},
@@ -96,8 +71,7 @@ func TestCDIAllocateResponse(t *testing.T) {
 			deviceIds:            []string{"gpu0", "gpu1"},
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0,nvidia.com/gpu=gpu1",
 				},
@@ -108,8 +82,7 @@ func TestCDIAllocateResponse(t *testing.T) {
 			deviceIds:            []string{"gpu0", "gpu1"},
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "custom.cdi.k8s.io/",
-			CDIEnabled:           true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"custom.cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0,nvidia.com/gpu=gpu1",
 				},
@@ -119,9 +92,8 @@ func TestCDIAllocateResponse(t *testing.T) {
 			description:          "mofed devices are selected if configured",
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
 			MOFEDEnabled:         true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/mofed=all",
 				},
@@ -131,9 +103,8 @@ func TestCDIAllocateResponse(t *testing.T) {
 			description:          "gds devices are selected if configured",
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
 			GDSEnabled:           true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gds=all",
 				},
@@ -144,18 +115,29 @@ func TestCDIAllocateResponse(t *testing.T) {
 			deviceIds:            []string{"gpu0"},
 			deviceListStrategies: []string{"cdi-annotations"},
 			CDIPrefix:            "cdi.k8s.io/",
-			CDIEnabled:           true,
 			GDSEnabled:           true,
 			MOFEDEnabled:         true,
-			expectedResponse: kubeletdevicepluginv1beta1.ContainerAllocateResponse{
+			expectedResponse: pluginapi.ContainerAllocateResponse{
 				Annotations: map[string]string{
 					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0,nvidia.com/gds=all,nvidia.com/mofed=all",
 				},
 			},
 		},
+		{
+			description:          "imex channel is included with devices",
+			deviceListStrategies: []string{"cdi-annotations"},
+			CDIPrefix:            "cdi.k8s.io/",
+			imexChannels:         []*imex.Channel{{ID: "0"}},
+			expectedResponse: pluginapi.ContainerAllocateResponse{
+				Annotations: map[string]string{
+					"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/imex-channel=0",
+				},
+			},
+		},
 	}
 
-	for _, tc := range testCases {
+	for i := range testCases {
+		tc := testCases[i]
 		t.Run(tc.description, func(t *testing.T) {
 			deviceListStrategies, _ := v1.NewDeviceListStrategies(tc.deviceListStrategies)
 			plugin := NvidiaDevicePlugin{
@@ -174,27 +156,16 @@ func TestCDIAllocateResponse(t *testing.T) {
 						return "nvidia.com/" + c + "=" + s
 					},
 				},
-				cdiEnabled:           tc.CDIEnabled,
 				deviceListStrategies: deviceListStrategies,
 				cdiAnnotationPrefix:  tc.CDIPrefix,
+				imexChannels:         tc.imexChannels,
 			}
 
-			response, err := plugin.getAllocateResponseForCDI("uuid", tc.deviceIds)
+			response := pluginapi.ContainerAllocateResponse{}
+			err := plugin.updateResponseForCDI(&response, "uuid", tc.deviceIds...)
 
 			require.Nil(t, err)
 			require.EqualValues(t, &tc.expectedResponse, &response)
 		})
 	}
 }
-
-func Test_pathGeneration(t *testing.T) {
-	hostHookPath := "/usr/local/vgpu"
-	uid := "testuid"
-	cname := "testcname"
-	expected := "/usr/local/vgpu/containers/testuid_testcname"
-	result := fmt.Sprintf("%s/containers/%s_%s", hostHookPath, uid, cname)
-
-	if expected != result {
-		t.Errorf("Expected %s, got %s", expected, result)
-	}
-}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go b/pkg/nvidia-plugin/pkg/plugin/util.go
similarity index 99%
rename from pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go
rename to pkg/nvidia-plugin/pkg/plugin/util.go
index d0bd35379..2b16900b3 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go
+++ b/pkg/nvidia-plugin/pkg/plugin/util.go
@@ -29,7 +29,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/klog/v2"
 
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/info"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/info"
 	"github.com/Project-HAMi/HAMi/pkg/util"
 )
 
diff --git a/pkg/nvidia-plugin/pkg/resource/cuda-device.go b/pkg/nvidia-plugin/pkg/resource/cuda-device.go
new file mode 100644
index 000000000..e8c7f28c0
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/cuda-device.go
@@ -0,0 +1,110 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cuda"
+)
+
+type cudaDevice cuda.Device
+
+var _ Device = (*cudaDevice)(nil)
+
+// NewCudaDevice constructs a new CUDA device
+func NewCudaDevice(d cuda.Device) Device {
+	device := cudaDevice(d)
+	return &device
+}
+
+// GetAttributes is unsupported for CUDA devices
+func (d *cudaDevice) GetAttributes() (map[string]interface{}, error) {
+	return nil, fmt.Errorf("GetAttributes is not supported for CUDA devices")
+}
+
+// GetCudaComputeCapability returns the CUDA Compute Capability major and minor versions.
+// If the device is a MIG device (i.e. a compute instance) these are 0
+func (d *cudaDevice) GetCudaComputeCapability() (int, int, error) {
+	major, r := cuda.Device(*d).GetAttribute(cuda.COMPUTE_CAPABILITY_MAJOR)
+	if r != cuda.SUCCESS {
+		return 0, 0, fmt.Errorf("failed to get CUDA compute capability major for device: result=%v", r)
+	}
+
+	minor, r := cuda.Device(*d).GetAttribute(cuda.COMPUTE_CAPABILITY_MINOR)
+	if r != cuda.SUCCESS {
+		return 0, 0, fmt.Errorf("failed to get CUDA compute capability minor for device: result=%v", r)
+	}
+
+	return major, minor, nil
+}
+
+// GetDeviceHandleFromMigDeviceHandle is unsupported for CUDA devices
+func (d *cudaDevice) GetDeviceHandleFromMigDeviceHandle() (Device, error) {
+	return nil, fmt.Errorf("GetDeviceHandleFromMigDeviceHandle is unsupported for CUDA devices")
+}
+
+// GetTotalMemoryMB returns the total memory for a device
+func (d *cudaDevice) GetTotalMemoryMB() (uint64, error) {
+	total, r := cuda.Device(*d).TotalMem()
+	if r != cuda.SUCCESS {
+		return 0, fmt.Errorf("failed to get memory info for device: %v", r)
+	}
+	return total / (1024 * 1024), nil
+}
+
+// GetMigDevices is unsupported for CUDA devices
+func (d *cudaDevice) GetMigDevices() ([]Device, error) {
+	return nil, fmt.Errorf("GetMigDevices is unsupported for CUDA devices")
+}
+
+// GetName returns the device name / model.
+func (d *cudaDevice) GetName() (string, error) {
+	name, r := cuda.Device(*d).GetName()
+	if r != cuda.SUCCESS {
+		return "", fmt.Errorf("failed to get device name: %v", r)
+	}
+
+	return name, nil
+}
+
+// GetUUID is unsupported for CUDA devices
+func (d *cudaDevice) GetUUID() (string, error) {
+	return "", fmt.Errorf("GetUUID is unsupported for CUDA devices")
+}
+
+// IsMigCapable always returns false for CUDA devices
+func (d *cudaDevice) IsMigCapable() (bool, error) {
+	return false, nil
+}
+
+// IsMigEnabled always returns false for CUDA devices
+func (d *cudaDevice) IsMigEnabled() (bool, error) {
+	return false, nil
+}
+
+func (d *cudaDevice) GetPCIClass() (uint32, error) {
+	return 0, nil
+}
+
+func (d *cudaDevice) IsFabricAttached() (bool, error) {
+	return false, nil
+}
+
+func (d *cudaDevice) GetFabricIDs() (string, string, error) {
+	return "", "", fmt.Errorf("GetFabricIDs is not supported for CUDA devices")
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/cuda-lib.go b/pkg/nvidia-plugin/pkg/resource/cuda-lib.go
new file mode 100644
index 000000000..0dc045a8d
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/cuda-lib.go
@@ -0,0 +1,88 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/cuda"
+)
+
+type cudaLib struct{}
+
+var _ Manager = (*cudaLib)(nil)
+
+// NewCudaManager returns an resource manger for CUDA devices
+func NewCudaManager() Manager {
+	return &cudaLib{}
+}
+
+// GetDevices returns the CUDA devices available on the system
+func (l *cudaLib) GetDevices() ([]Device, error) {
+	count, r := cuda.DeviceGetCount()
+	if r != cuda.SUCCESS {
+		return nil, fmt.Errorf("failed to get number of CUDA devices: %v", r)
+	}
+
+	var devices []Device
+	for i := 0; i < count; i++ {
+		d, r := cuda.DeviceGet(i)
+		if r != cuda.SUCCESS {
+			return nil, fmt.Errorf("failed to get CUDA device %v: %v", i, r)
+		}
+		devices = append(devices, NewCudaDevice(d))
+	}
+
+	return devices, nil
+}
+
+// GetCudaDriverVersion returns the CUDA driver version
+func (l *cudaLib) GetCudaDriverVersion() (int, int, error) {
+	version, r := cuda.DriverGetVersion()
+	if r != cuda.SUCCESS {
+		return 0, 0, fmt.Errorf("failed to get driver version: %v", r)
+	}
+
+	major := version / 1000
+	minor := version % 100 / 10
+
+	return major, minor, nil
+}
+
+// GetDriverVersion returns the driver version.
+// This is currently "unknown" for Tegra systems.
+func (l *cudaLib) GetDriverVersion() (string, error) {
+	return "unknown.unknown.unknown", nil
+}
+
+// Init initializes the CUDA library.
+func (l *cudaLib) Init() error {
+	r := cuda.Init()
+	if r != cuda.SUCCESS {
+		return fmt.Errorf("%v", r)
+	}
+	return nil
+}
+
+// Shutdown shuts down the CUDA library.
+func (l *cudaLib) Shutdown() (err error) {
+	r := cuda.Shutdown()
+	if r != cuda.SUCCESS {
+		return fmt.Errorf("%v", r)
+	}
+	return nil
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/device_mock.go b/pkg/nvidia-plugin/pkg/resource/device_mock.go
new file mode 100644
index 000000000..eadc8932b
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/device_mock.go
@@ -0,0 +1,437 @@
+// Code generated by moq; DO NOT EDIT.
+// github.com/matryer/moq
+
+package resource
+
+import (
+	"sync"
+)
+
+// Ensure, that DeviceMock does implement Device.
+// If this is not the case, regenerate this file with moq.
+var _ Device = &DeviceMock{}
+
+// DeviceMock is a mock implementation of Device.
+//
+//	func TestSomethingThatUsesDevice(t *testing.T) {
+//
+//		// make and configure a mocked Device
+//		mockedDevice := &DeviceMock{
+//			GetAttributesFunc: func() (map[string]interface{}, error) {
+//				panic("mock out the GetAttributes method")
+//			},
+//			GetCudaComputeCapabilityFunc: func() (int, int, error) {
+//				panic("mock out the GetCudaComputeCapability method")
+//			},
+//			GetDeviceHandleFromMigDeviceHandleFunc: func() (Device, error) {
+//				panic("mock out the GetDeviceHandleFromMigDeviceHandle method")
+//			},
+//			GetFabricIDsFunc: func() (string, string, error) {
+//				panic("mock out the GetFabricIDs method")
+//			},
+//			GetMigDevicesFunc: func() ([]Device, error) {
+//				panic("mock out the GetMigDevices method")
+//			},
+//			GetNameFunc: func() (string, error) {
+//				panic("mock out the GetName method")
+//			},
+//			GetPCIClassFunc: func() (uint32, error) {
+//				panic("mock out the GetPCIClass method")
+//			},
+//			GetTotalMemoryMBFunc: func() (uint64, error) {
+//				panic("mock out the GetTotalMemoryMB method")
+//			},
+//			IsFabricAttachedFunc: func() (bool, error) {
+//				panic("mock out the IsFabricAttached method")
+//			},
+//			IsMigCapableFunc: func() (bool, error) {
+//				panic("mock out the IsMigCapable method")
+//			},
+//			IsMigEnabledFunc: func() (bool, error) {
+//				panic("mock out the IsMigEnabled method")
+//			},
+//		}
+//
+//		// use mockedDevice in code that requires Device
+//		// and then make assertions.
+//
+//	}
+type DeviceMock struct {
+	// GetAttributesFunc mocks the GetAttributes method.
+	GetAttributesFunc func() (map[string]interface{}, error)
+
+	// GetCudaComputeCapabilityFunc mocks the GetCudaComputeCapability method.
+	GetCudaComputeCapabilityFunc func() (int, int, error)
+
+	// GetDeviceHandleFromMigDeviceHandleFunc mocks the GetDeviceHandleFromMigDeviceHandle method.
+	GetDeviceHandleFromMigDeviceHandleFunc func() (Device, error)
+
+	// GetFabricIDsFunc mocks the GetFabricIDs method.
+	GetFabricIDsFunc func() (string, string, error)
+
+	// GetMigDevicesFunc mocks the GetMigDevices method.
+	GetMigDevicesFunc func() ([]Device, error)
+
+	// GetNameFunc mocks the GetName method.
+	GetNameFunc func() (string, error)
+
+	// GetPCIClassFunc mocks the GetPCIClass method.
+	GetPCIClassFunc func() (uint32, error)
+
+	// GetTotalMemoryMBFunc mocks the GetTotalMemoryMB method.
+	GetTotalMemoryMBFunc func() (uint64, error)
+
+	// IsFabricAttachedFunc mocks the IsFabricAttached method.
+	IsFabricAttachedFunc func() (bool, error)
+
+	// IsMigCapableFunc mocks the IsMigCapable method.
+	IsMigCapableFunc func() (bool, error)
+
+	// IsMigEnabledFunc mocks the IsMigEnabled method.
+	IsMigEnabledFunc func() (bool, error)
+
+	// calls tracks calls to the methods.
+	calls struct {
+		// GetAttributes holds details about calls to the GetAttributes method.
+		GetAttributes []struct {
+		}
+		// GetCudaComputeCapability holds details about calls to the GetCudaComputeCapability method.
+		GetCudaComputeCapability []struct {
+		}
+		// GetDeviceHandleFromMigDeviceHandle holds details about calls to the GetDeviceHandleFromMigDeviceHandle method.
+		GetDeviceHandleFromMigDeviceHandle []struct {
+		}
+		// GetFabricIDs holds details about calls to the GetFabricIDs method.
+		GetFabricIDs []struct {
+		}
+		// GetMigDevices holds details about calls to the GetMigDevices method.
+		GetMigDevices []struct {
+		}
+		// GetName holds details about calls to the GetName method.
+		GetName []struct {
+		}
+		// GetPCIClass holds details about calls to the GetPCIClass method.
+		GetPCIClass []struct {
+		}
+		// GetTotalMemoryMB holds details about calls to the GetTotalMemoryMB method.
+		GetTotalMemoryMB []struct {
+		}
+		// IsFabricAttached holds details about calls to the IsFabricAttached method.
+		IsFabricAttached []struct {
+		}
+		// IsMigCapable holds details about calls to the IsMigCapable method.
+		IsMigCapable []struct {
+		}
+		// IsMigEnabled holds details about calls to the IsMigEnabled method.
+		IsMigEnabled []struct {
+		}
+	}
+	lockGetAttributes                      sync.RWMutex
+	lockGetCudaComputeCapability           sync.RWMutex
+	lockGetDeviceHandleFromMigDeviceHandle sync.RWMutex
+	lockGetFabricIDs                       sync.RWMutex
+	lockGetMigDevices                      sync.RWMutex
+	lockGetName                            sync.RWMutex
+	lockGetPCIClass                        sync.RWMutex
+	lockGetTotalMemoryMB                   sync.RWMutex
+	lockIsFabricAttached                   sync.RWMutex
+	lockIsMigCapable                       sync.RWMutex
+	lockIsMigEnabled                       sync.RWMutex
+}
+
+// GetAttributes calls GetAttributesFunc.
+func (mock *DeviceMock) GetAttributes() (map[string]interface{}, error) {
+	if mock.GetAttributesFunc == nil {
+		panic("DeviceMock.GetAttributesFunc: method is nil but Device.GetAttributes was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetAttributes.Lock()
+	mock.calls.GetAttributes = append(mock.calls.GetAttributes, callInfo)
+	mock.lockGetAttributes.Unlock()
+	return mock.GetAttributesFunc()
+}
+
+// GetAttributesCalls gets all the calls that were made to GetAttributes.
+// Check the length with:
+//
+//	len(mockedDevice.GetAttributesCalls())
+func (mock *DeviceMock) GetAttributesCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetAttributes.RLock()
+	calls = mock.calls.GetAttributes
+	mock.lockGetAttributes.RUnlock()
+	return calls
+}
+
+// GetCudaComputeCapability calls GetCudaComputeCapabilityFunc.
+func (mock *DeviceMock) GetCudaComputeCapability() (int, int, error) {
+	if mock.GetCudaComputeCapabilityFunc == nil {
+		panic("DeviceMock.GetCudaComputeCapabilityFunc: method is nil but Device.GetCudaComputeCapability was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetCudaComputeCapability.Lock()
+	mock.calls.GetCudaComputeCapability = append(mock.calls.GetCudaComputeCapability, callInfo)
+	mock.lockGetCudaComputeCapability.Unlock()
+	return mock.GetCudaComputeCapabilityFunc()
+}
+
+// GetCudaComputeCapabilityCalls gets all the calls that were made to GetCudaComputeCapability.
+// Check the length with:
+//
+//	len(mockedDevice.GetCudaComputeCapabilityCalls())
+func (mock *DeviceMock) GetCudaComputeCapabilityCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetCudaComputeCapability.RLock()
+	calls = mock.calls.GetCudaComputeCapability
+	mock.lockGetCudaComputeCapability.RUnlock()
+	return calls
+}
+
+// GetDeviceHandleFromMigDeviceHandle calls GetDeviceHandleFromMigDeviceHandleFunc.
+func (mock *DeviceMock) GetDeviceHandleFromMigDeviceHandle() (Device, error) {
+	if mock.GetDeviceHandleFromMigDeviceHandleFunc == nil {
+		panic("DeviceMock.GetDeviceHandleFromMigDeviceHandleFunc: method is nil but Device.GetDeviceHandleFromMigDeviceHandle was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetDeviceHandleFromMigDeviceHandle.Lock()
+	mock.calls.GetDeviceHandleFromMigDeviceHandle = append(mock.calls.GetDeviceHandleFromMigDeviceHandle, callInfo)
+	mock.lockGetDeviceHandleFromMigDeviceHandle.Unlock()
+	return mock.GetDeviceHandleFromMigDeviceHandleFunc()
+}
+
+// GetDeviceHandleFromMigDeviceHandleCalls gets all the calls that were made to GetDeviceHandleFromMigDeviceHandle.
+// Check the length with:
+//
+//	len(mockedDevice.GetDeviceHandleFromMigDeviceHandleCalls())
+func (mock *DeviceMock) GetDeviceHandleFromMigDeviceHandleCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetDeviceHandleFromMigDeviceHandle.RLock()
+	calls = mock.calls.GetDeviceHandleFromMigDeviceHandle
+	mock.lockGetDeviceHandleFromMigDeviceHandle.RUnlock()
+	return calls
+}
+
+// GetFabricIDs calls GetFabricIDsFunc.
+func (mock *DeviceMock) GetFabricIDs() (string, string, error) {
+	if mock.GetFabricIDsFunc == nil {
+		panic("DeviceMock.GetFabricIDsFunc: method is nil but Device.GetFabricIDs was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetFabricIDs.Lock()
+	mock.calls.GetFabricIDs = append(mock.calls.GetFabricIDs, callInfo)
+	mock.lockGetFabricIDs.Unlock()
+	return mock.GetFabricIDsFunc()
+}
+
+// GetFabricIDsCalls gets all the calls that were made to GetFabricIDs.
+// Check the length with:
+//
+//	len(mockedDevice.GetFabricIDsCalls())
+func (mock *DeviceMock) GetFabricIDsCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetFabricIDs.RLock()
+	calls = mock.calls.GetFabricIDs
+	mock.lockGetFabricIDs.RUnlock()
+	return calls
+}
+
+// GetMigDevices calls GetMigDevicesFunc.
+func (mock *DeviceMock) GetMigDevices() ([]Device, error) {
+	if mock.GetMigDevicesFunc == nil {
+		panic("DeviceMock.GetMigDevicesFunc: method is nil but Device.GetMigDevices was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetMigDevices.Lock()
+	mock.calls.GetMigDevices = append(mock.calls.GetMigDevices, callInfo)
+	mock.lockGetMigDevices.Unlock()
+	return mock.GetMigDevicesFunc()
+}
+
+// GetMigDevicesCalls gets all the calls that were made to GetMigDevices.
+// Check the length with:
+//
+//	len(mockedDevice.GetMigDevicesCalls())
+func (mock *DeviceMock) GetMigDevicesCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetMigDevices.RLock()
+	calls = mock.calls.GetMigDevices
+	mock.lockGetMigDevices.RUnlock()
+	return calls
+}
+
+// GetName calls GetNameFunc.
+func (mock *DeviceMock) GetName() (string, error) {
+	if mock.GetNameFunc == nil {
+		panic("DeviceMock.GetNameFunc: method is nil but Device.GetName was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetName.Lock()
+	mock.calls.GetName = append(mock.calls.GetName, callInfo)
+	mock.lockGetName.Unlock()
+	return mock.GetNameFunc()
+}
+
+// GetNameCalls gets all the calls that were made to GetName.
+// Check the length with:
+//
+//	len(mockedDevice.GetNameCalls())
+func (mock *DeviceMock) GetNameCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetName.RLock()
+	calls = mock.calls.GetName
+	mock.lockGetName.RUnlock()
+	return calls
+}
+
+// GetPCIClass calls GetPCIClassFunc.
+func (mock *DeviceMock) GetPCIClass() (uint32, error) {
+	if mock.GetPCIClassFunc == nil {
+		panic("DeviceMock.GetPCIClassFunc: method is nil but Device.GetPCIClass was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetPCIClass.Lock()
+	mock.calls.GetPCIClass = append(mock.calls.GetPCIClass, callInfo)
+	mock.lockGetPCIClass.Unlock()
+	return mock.GetPCIClassFunc()
+}
+
+// GetPCIClassCalls gets all the calls that were made to GetPCIClass.
+// Check the length with:
+//
+//	len(mockedDevice.GetPCIClassCalls())
+func (mock *DeviceMock) GetPCIClassCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetPCIClass.RLock()
+	calls = mock.calls.GetPCIClass
+	mock.lockGetPCIClass.RUnlock()
+	return calls
+}
+
+// GetTotalMemoryMB calls GetTotalMemoryMBFunc.
+func (mock *DeviceMock) GetTotalMemoryMB() (uint64, error) {
+	if mock.GetTotalMemoryMBFunc == nil {
+		panic("DeviceMock.GetTotalMemoryMBFunc: method is nil but Device.GetTotalMemoryMB was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetTotalMemoryMB.Lock()
+	mock.calls.GetTotalMemoryMB = append(mock.calls.GetTotalMemoryMB, callInfo)
+	mock.lockGetTotalMemoryMB.Unlock()
+	return mock.GetTotalMemoryMBFunc()
+}
+
+// GetTotalMemoryMBCalls gets all the calls that were made to GetTotalMemoryMB.
+// Check the length with:
+//
+//	len(mockedDevice.GetTotalMemoryMBCalls())
+func (mock *DeviceMock) GetTotalMemoryMBCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetTotalMemoryMB.RLock()
+	calls = mock.calls.GetTotalMemoryMB
+	mock.lockGetTotalMemoryMB.RUnlock()
+	return calls
+}
+
+// IsFabricAttached calls IsFabricAttachedFunc.
+func (mock *DeviceMock) IsFabricAttached() (bool, error) {
+	if mock.IsFabricAttachedFunc == nil {
+		panic("DeviceMock.IsFabricAttachedFunc: method is nil but Device.IsFabricAttached was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockIsFabricAttached.Lock()
+	mock.calls.IsFabricAttached = append(mock.calls.IsFabricAttached, callInfo)
+	mock.lockIsFabricAttached.Unlock()
+	return mock.IsFabricAttachedFunc()
+}
+
+// IsFabricAttachedCalls gets all the calls that were made to IsFabricAttached.
+// Check the length with:
+//
+//	len(mockedDevice.IsFabricAttachedCalls())
+func (mock *DeviceMock) IsFabricAttachedCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockIsFabricAttached.RLock()
+	calls = mock.calls.IsFabricAttached
+	mock.lockIsFabricAttached.RUnlock()
+	return calls
+}
+
+// IsMigCapable calls IsMigCapableFunc.
+func (mock *DeviceMock) IsMigCapable() (bool, error) {
+	if mock.IsMigCapableFunc == nil {
+		panic("DeviceMock.IsMigCapableFunc: method is nil but Device.IsMigCapable was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockIsMigCapable.Lock()
+	mock.calls.IsMigCapable = append(mock.calls.IsMigCapable, callInfo)
+	mock.lockIsMigCapable.Unlock()
+	return mock.IsMigCapableFunc()
+}
+
+// IsMigCapableCalls gets all the calls that were made to IsMigCapable.
+// Check the length with:
+//
+//	len(mockedDevice.IsMigCapableCalls())
+func (mock *DeviceMock) IsMigCapableCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockIsMigCapable.RLock()
+	calls = mock.calls.IsMigCapable
+	mock.lockIsMigCapable.RUnlock()
+	return calls
+}
+
+// IsMigEnabled calls IsMigEnabledFunc.
+func (mock *DeviceMock) IsMigEnabled() (bool, error) {
+	if mock.IsMigEnabledFunc == nil {
+		panic("DeviceMock.IsMigEnabledFunc: method is nil but Device.IsMigEnabled was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockIsMigEnabled.Lock()
+	mock.calls.IsMigEnabled = append(mock.calls.IsMigEnabled, callInfo)
+	mock.lockIsMigEnabled.Unlock()
+	return mock.IsMigEnabledFunc()
+}
+
+// IsMigEnabledCalls gets all the calls that were made to IsMigEnabled.
+// Check the length with:
+//
+//	len(mockedDevice.IsMigEnabledCalls())
+func (mock *DeviceMock) IsMigEnabledCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockIsMigEnabled.RLock()
+	calls = mock.calls.IsMigEnabled
+	mock.lockIsMigEnabled.RUnlock()
+	return calls
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/factory.go b/pkg/nvidia-plugin/pkg/resource/factory.go
new file mode 100644
index 000000000..88a1e1a05
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/factory.go
@@ -0,0 +1,84 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+// NewManager is a factory method that creates a resource Manager based on the specified config.
+func NewManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) (Manager, error) {
+	manager, err := getManager(infolib, nvmllib, devicelib, *config.Flags.DeviceDiscoveryStrategy)
+	if err != nil {
+		if *config.Flags.FailOnInitError {
+			return nil, err
+		}
+		klog.ErrorS(err, "using empty manager")
+		return NewNullManager(), nil
+	}
+	return WithConfig(manager, config), nil
+}
+
+// WithConfig modifies a manager depending on the specified config.
+// If failure on a call to init is allowed, the manager is wrapped to allow fallback to a Null manager.
+func WithConfig(manager Manager, config *spec.Config) Manager {
+	if *config.Flags.FailOnInitError {
+		return manager
+	}
+
+	return NewFallbackToNullOnInitError(manager)
+}
+
+// getManager returns the resource manager depending on the system configuration.
+func getManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, strategy string) (Manager, error) {
+	resolved := resolveMode(infolib, strategy)
+	switch resolved {
+	case "nvml":
+		klog.Info("Using NVML manager")
+		return NewNVMLManager(nvmllib, devicelib), nil
+	case "tegra":
+		klog.Info("Using CUDA manager")
+		return NewCudaManager(), nil
+	case "vfio":
+		klog.Info("Using Vfio manager")
+		return NewVfioManager(), nil
+	default:
+		return nil, fmt.Errorf("unsupported strategy %v", resolved)
+	}
+}
+
+func resolveMode(infolib info.Interface, strategy string) string {
+	if strategy != "" && strategy != "auto" {
+		return strategy
+	}
+
+	platform := infolib.ResolvePlatform()
+	switch platform {
+	case info.PlatformNVML, info.PlatformWSL:
+		return "nvml"
+	case info.PlatformTegra:
+		return "tegra"
+	}
+	return strategy
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/fallback.go b/pkg/nvidia-plugin/pkg/resource/fallback.go
new file mode 100644
index 000000000..03f7db67e
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/fallback.go
@@ -0,0 +1,64 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"k8s.io/klog/v2"
+)
+
+type withFallBack struct {
+	wraps    Manager
+	fallback Manager
+}
+
+// NewFallbackToNullOnInitError creates a manager that becomes a Null manager on the first Init error.
+func NewFallbackToNullOnInitError(m Manager) Manager {
+	return &withFallBack{
+		wraps:    m,
+		fallback: NewNullManager(),
+	}
+}
+
+// Init calls the Init function and if this does not succeed falls back to a Null manager.
+func (m *withFallBack) Init() error {
+	err := m.wraps.Init()
+	if err != nil {
+		klog.Warningf("Failed to initialize resource manager: %v", err)
+		m.wraps = m.fallback
+	}
+	return nil
+}
+
+// Shutdown delegates to the wrapped manager
+func (m *withFallBack) Shutdown() (err error) {
+	return m.wraps.Shutdown()
+}
+
+// GetDevices delegates to the wrapped manager
+func (m *withFallBack) GetDevices() ([]Device, error) {
+	return m.wraps.GetDevices()
+}
+
+// GetCudaDriverVersion delegates to the wrapped manager
+func (m *withFallBack) GetCudaDriverVersion() (int, int, error) {
+	return m.wraps.GetCudaDriverVersion()
+}
+
+// GetDriverVersion delegates to the wrapped manager
+func (m *withFallBack) GetDriverVersion() (string, error) {
+	return m.wraps.GetDriverVersion()
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/fallback_test.go b/pkg/nvidia-plugin/pkg/resource/fallback_test.go
new file mode 100644
index 000000000..9a21fd852
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/fallback_test.go
@@ -0,0 +1,62 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestFallback(t *testing.T) {
+	testCases := []struct {
+		initError     error
+		shutdownError error
+	}{
+		{
+			initError: fmt.Errorf("init failed"),
+		},
+		{
+			shutdownError: fmt.Errorf("should not be called"),
+		},
+	}
+
+	for _, tc := range testCases {
+
+		m := &ManagerMock{
+			InitFunc: func() error {
+				return tc.initError
+			},
+			ShutdownFunc: func() error {
+				return tc.shutdownError
+			},
+		}
+
+		f := NewFallbackToNullOnInitError(m)
+
+		require.NoError(t, f.Init())
+
+		err := f.Shutdown()
+		if tc.shutdownError == nil {
+			require.NoError(t, err)
+		} else {
+			require.EqualError(t, err, tc.shutdownError.Error())
+		}
+
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/manager_mock.go b/pkg/nvidia-plugin/pkg/resource/manager_mock.go
new file mode 100644
index 000000000..3543e0a7b
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/manager_mock.go
@@ -0,0 +1,215 @@
+// Code generated by moq; DO NOT EDIT.
+// github.com/matryer/moq
+
+package resource
+
+import (
+	"sync"
+)
+
+// Ensure, that ManagerMock does implement Manager.
+// If this is not the case, regenerate this file with moq.
+var _ Manager = &ManagerMock{}
+
+// ManagerMock is a mock implementation of Manager.
+//
+//	func TestSomethingThatUsesManager(t *testing.T) {
+//
+//		// make and configure a mocked Manager
+//		mockedManager := &ManagerMock{
+//			GetCudaDriverVersionFunc: func() (int, int, error) {
+//				panic("mock out the GetCudaDriverVersion method")
+//			},
+//			GetDevicesFunc: func() ([]Device, error) {
+//				panic("mock out the GetDevices method")
+//			},
+//			GetDriverVersionFunc: func() (string, error) {
+//				panic("mock out the GetDriverVersion method")
+//			},
+//			InitFunc: func() error {
+//				panic("mock out the Init method")
+//			},
+//			ShutdownFunc: func() error {
+//				panic("mock out the Shutdown method")
+//			},
+//		}
+//
+//		// use mockedManager in code that requires Manager
+//		// and then make assertions.
+//
+//	}
+type ManagerMock struct {
+	// GetCudaDriverVersionFunc mocks the GetCudaDriverVersion method.
+	GetCudaDriverVersionFunc func() (int, int, error)
+
+	// GetDevicesFunc mocks the GetDevices method.
+	GetDevicesFunc func() ([]Device, error)
+
+	// GetDriverVersionFunc mocks the GetDriverVersion method.
+	GetDriverVersionFunc func() (string, error)
+
+	// InitFunc mocks the Init method.
+	InitFunc func() error
+
+	// ShutdownFunc mocks the Shutdown method.
+	ShutdownFunc func() error
+
+	// calls tracks calls to the methods.
+	calls struct {
+		// GetCudaDriverVersion holds details about calls to the GetCudaDriverVersion method.
+		GetCudaDriverVersion []struct {
+		}
+		// GetDevices holds details about calls to the GetDevices method.
+		GetDevices []struct {
+		}
+		// GetDriverVersion holds details about calls to the GetDriverVersion method.
+		GetDriverVersion []struct {
+		}
+		// Init holds details about calls to the Init method.
+		Init []struct {
+		}
+		// Shutdown holds details about calls to the Shutdown method.
+		Shutdown []struct {
+		}
+	}
+	lockGetCudaDriverVersion sync.RWMutex
+	lockGetDevices           sync.RWMutex
+	lockGetDriverVersion     sync.RWMutex
+	lockInit                 sync.RWMutex
+	lockShutdown             sync.RWMutex
+}
+
+// GetCudaDriverVersion calls GetCudaDriverVersionFunc.
+func (mock *ManagerMock) GetCudaDriverVersion() (int, int, error) {
+	if mock.GetCudaDriverVersionFunc == nil {
+		panic("ManagerMock.GetCudaDriverVersionFunc: method is nil but Manager.GetCudaDriverVersion was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetCudaDriverVersion.Lock()
+	mock.calls.GetCudaDriverVersion = append(mock.calls.GetCudaDriverVersion, callInfo)
+	mock.lockGetCudaDriverVersion.Unlock()
+	return mock.GetCudaDriverVersionFunc()
+}
+
+// GetCudaDriverVersionCalls gets all the calls that were made to GetCudaDriverVersion.
+// Check the length with:
+//
+//	len(mockedManager.GetCudaDriverVersionCalls())
+func (mock *ManagerMock) GetCudaDriverVersionCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetCudaDriverVersion.RLock()
+	calls = mock.calls.GetCudaDriverVersion
+	mock.lockGetCudaDriverVersion.RUnlock()
+	return calls
+}
+
+// GetDevices calls GetDevicesFunc.
+func (mock *ManagerMock) GetDevices() ([]Device, error) {
+	if mock.GetDevicesFunc == nil {
+		panic("ManagerMock.GetDevicesFunc: method is nil but Manager.GetDevices was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetDevices.Lock()
+	mock.calls.GetDevices = append(mock.calls.GetDevices, callInfo)
+	mock.lockGetDevices.Unlock()
+	return mock.GetDevicesFunc()
+}
+
+// GetDevicesCalls gets all the calls that were made to GetDevices.
+// Check the length with:
+//
+//	len(mockedManager.GetDevicesCalls())
+func (mock *ManagerMock) GetDevicesCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetDevices.RLock()
+	calls = mock.calls.GetDevices
+	mock.lockGetDevices.RUnlock()
+	return calls
+}
+
+// GetDriverVersion calls GetDriverVersionFunc.
+func (mock *ManagerMock) GetDriverVersion() (string, error) {
+	if mock.GetDriverVersionFunc == nil {
+		panic("ManagerMock.GetDriverVersionFunc: method is nil but Manager.GetDriverVersion was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockGetDriverVersion.Lock()
+	mock.calls.GetDriverVersion = append(mock.calls.GetDriverVersion, callInfo)
+	mock.lockGetDriverVersion.Unlock()
+	return mock.GetDriverVersionFunc()
+}
+
+// GetDriverVersionCalls gets all the calls that were made to GetDriverVersion.
+// Check the length with:
+//
+//	len(mockedManager.GetDriverVersionCalls())
+func (mock *ManagerMock) GetDriverVersionCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockGetDriverVersion.RLock()
+	calls = mock.calls.GetDriverVersion
+	mock.lockGetDriverVersion.RUnlock()
+	return calls
+}
+
+// Init calls InitFunc.
+func (mock *ManagerMock) Init() error {
+	if mock.InitFunc == nil {
+		panic("ManagerMock.InitFunc: method is nil but Manager.Init was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockInit.Lock()
+	mock.calls.Init = append(mock.calls.Init, callInfo)
+	mock.lockInit.Unlock()
+	return mock.InitFunc()
+}
+
+// InitCalls gets all the calls that were made to Init.
+// Check the length with:
+//
+//	len(mockedManager.InitCalls())
+func (mock *ManagerMock) InitCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockInit.RLock()
+	calls = mock.calls.Init
+	mock.lockInit.RUnlock()
+	return calls
+}
+
+// Shutdown calls ShutdownFunc.
+func (mock *ManagerMock) Shutdown() error {
+	if mock.ShutdownFunc == nil {
+		panic("ManagerMock.ShutdownFunc: method is nil but Manager.Shutdown was just called")
+	}
+	callInfo := struct {
+	}{}
+	mock.lockShutdown.Lock()
+	mock.calls.Shutdown = append(mock.calls.Shutdown, callInfo)
+	mock.lockShutdown.Unlock()
+	return mock.ShutdownFunc()
+}
+
+// ShutdownCalls gets all the calls that were made to Shutdown.
+// Check the length with:
+//
+//	len(mockedManager.ShutdownCalls())
+func (mock *ManagerMock) ShutdownCalls() []struct {
+} {
+	var calls []struct {
+	}
+	mock.lockShutdown.RLock()
+	calls = mock.calls.Shutdown
+	mock.lockShutdown.RUnlock()
+	return calls
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/null.go b/pkg/nvidia-plugin/pkg/resource/null.go
new file mode 100644
index 000000000..0955fc43b
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/null.go
@@ -0,0 +1,57 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+)
+
+type null struct{}
+
+var _ Manager = (*null)(nil)
+
+// NewNullManager returns an instance of a CUDA-based library that can be used
+// when no operations are required.
+// This returns no devices and the Init and Shutdown methods are no-ops.
+func NewNullManager() Manager {
+	return &null{}
+}
+
+// Init is a no-op for the null manager
+func (l *null) Init() error {
+	return nil
+}
+
+// Shutdown is a no-op for the null manager
+func (l *null) Shutdown() (err error) {
+	return nil
+}
+
+// GetDevices returns a nil slice for the null manager
+func (l *null) GetDevices() ([]Device, error) {
+	return nil, nil
+}
+
+// GetCudaDriverVersion is not supported
+func (l *null) GetCudaDriverVersion() (int, int, error) {
+	return 0, 0, fmt.Errorf("GetCudaDriverVersion is unsupported")
+}
+
+// GetDriverVersion is not supported
+func (l *null) GetDriverVersion() (string, error) {
+	return "", fmt.Errorf("GetDriverVersion is unsupported")
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/nvml-device.go b/pkg/nvidia-plugin/pkg/resource/nvml-device.go
new file mode 100644
index 000000000..9b29dc7bd
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/nvml-device.go
@@ -0,0 +1,119 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvpci"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+
+	"github.com/google/uuid"
+)
+
+type nvmlDevice struct {
+	device.Device
+	devicelib device.Interface
+}
+
+var _ Device = (*nvmlDevice)(nil)
+
+// GetMigDevices returns the list of MIG devices configured on this device
+func (d nvmlDevice) GetMigDevices() ([]Device, error) {
+	migs, err := d.Device.GetMigDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	var devices []Device
+	for _, m := range migs {
+		device := nvmlMigDevice{
+			MigDevice: m,
+			devicelib: d.devicelib,
+		}
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
+// GetCudaComputeCapability returns the CUDA major and minor versions.
+func (d nvmlDevice) GetCudaComputeCapability() (int, int, error) {
+	major, minor, ret := d.Device.GetCudaComputeCapability()
+	if ret != nvml.SUCCESS {
+		return 0, 0, ret
+	}
+
+	return major, minor, nil
+}
+
+// GetAttributes is only supported for MIG devices.
+func (d nvmlDevice) GetAttributes() (map[string]interface{}, error) {
+	return nil, fmt.Errorf("GetAttributes is not supported for non-MIG devices")
+}
+
+// GetDeviceHandleFromMigDeviceHandle is only supported for MIG devices
+func (d nvmlDevice) GetDeviceHandleFromMigDeviceHandle() (Device, error) {
+	return nil, fmt.Errorf("GetDeviceHandleFromMigDeviceHandle is not supported for non-MIG devices")
+}
+
+// GetName returns the device name / model.
+func (d nvmlDevice) GetName() (string, error) {
+	name, ret := d.Device.GetName()
+	if ret != nvml.SUCCESS {
+		return "", ret
+	}
+	return name, nil
+}
+
+// GetTotalMemoryMB returns the total memory on a device in MB
+func (d nvmlDevice) GetTotalMemoryMB() (uint64, error) {
+	info, ret := d.Device.GetMemoryInfo()
+	if ret != nvml.SUCCESS {
+		return 0, ret
+	}
+	return info.Total / (1024 * 1024), nil
+}
+
+func (d nvmlDevice) GetPCIClass() (uint32, error) {
+	pciBusID, err := d.GetPCIBusID()
+	if err != nil {
+		return 0, err
+	}
+	nvDevice, err := nvpci.New().GetGPUByPciBusID(pciBusID)
+	if err != nil {
+		return 0, err
+	}
+	return nvDevice.Class, nil
+}
+
+func (d nvmlDevice) GetFabricIDs() (string, string, error) {
+	info, ret := d.GetGpuFabricInfo()
+	if ret != nvml.SUCCESS {
+		return "", "", fmt.Errorf("failed to get GPU fabric info: %w", ret)
+	}
+
+	clusterUUID, err := uuid.FromBytes(info.ClusterUuid[:])
+	if err != nil {
+		return "", "", fmt.Errorf("invalid cluster UUID: %w", err)
+	}
+
+	cliqueId := fmt.Sprintf("%d", info.CliqueId)
+
+	return clusterUUID.String(), cliqueId, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/nvml-lib.go b/pkg/nvidia-plugin/pkg/resource/nvml-lib.go
new file mode 100644
index 000000000..ad1d97216
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/nvml-lib.go
@@ -0,0 +1,94 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+type nvmlLib struct {
+	nvml.Interface
+	devicelib device.Interface
+}
+
+// NewNVMLManager creates a new manager that uses NVML to query and manage devices
+func NewNVMLManager(nvmllib nvml.Interface, devicelib device.Interface) Manager {
+	m := nvmlLib{
+		Interface: nvmllib,
+		devicelib: devicelib,
+	}
+	return m
+}
+
+// GetCudaDriverVersion : Return the cuda v using NVML
+func (l nvmlLib) GetCudaDriverVersion() (int, int, error) {
+	v, ret := l.Interface.SystemGetCudaDriverVersion()
+	if ret != nvml.SUCCESS {
+		return 0, 0, ret
+	}
+	major := v / 1000
+	minor := v % 1000 / 10
+
+	return major, minor, nil
+}
+
+// GetDevices returns the NVML devices for the manager
+func (l nvmlLib) GetDevices() ([]Device, error) {
+	libdevices, err := l.devicelib.GetDevices()
+	if err != nil {
+		return nil, err
+	}
+
+	var devices []Device
+	for _, d := range libdevices {
+		device := nvmlDevice{
+			Device:    d,
+			devicelib: l.devicelib,
+		}
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
+// GetDriverVersion returns the driver version
+func (l nvmlLib) GetDriverVersion() (string, error) {
+	v, ret := l.Interface.SystemGetDriverVersion()
+	if ret != nvml.SUCCESS {
+		return "", ret
+	}
+	return v, nil
+}
+
+// Init initialises the library
+func (l nvmlLib) Init() error {
+	ret := l.Interface.Init()
+	if ret != nvml.SUCCESS {
+		return ret
+	}
+	return nil
+}
+
+// Shutdown shuts down the library
+func (l nvmlLib) Shutdown() error {
+	ret := l.Interface.Shutdown()
+	if ret != nvml.SUCCESS {
+		return ret
+	}
+	return nil
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/nvml-mig-device.go b/pkg/nvidia-plugin/pkg/resource/nvml-mig-device.go
new file mode 100644
index 000000000..cf3d05300
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/nvml-mig-device.go
@@ -0,0 +1,152 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvpci"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+type nvmlMigDevice struct {
+	device.MigDevice
+	devicelib device.Interface
+}
+
+var _ Device = (*nvmlMigDevice)(nil)
+
+// GetAttributes is only supported for MIG devices.
+func (d nvmlMigDevice) GetAttributes() (map[string]interface{}, error) {
+	attributes, ret := d.MigDevice.GetAttributes()
+	if ret != nvml.SUCCESS {
+		return nil, ret
+	}
+	a := map[string]interface{}{
+		"memory":          attributes.MemorySizeMB,
+		"multiprocessors": attributes.MultiprocessorCount,
+		"slices.gi":       attributes.GpuInstanceSliceCount,
+		"slices.ci":       attributes.ComputeInstanceSliceCount,
+		"engines.copy":    attributes.SharedCopyEngineCount,
+		"engines.decoder": attributes.SharedDecoderCount,
+		"engines.encoder": attributes.SharedEncoderCount,
+		"engines.jpeg":    attributes.SharedJpegCount,
+		"engines.ofa":     attributes.SharedOfaCount,
+	}
+
+	return a, nil
+}
+
+// GetDeviceHandleFromMigDeviceHandle is only supported for MIG devices
+func (d nvmlMigDevice) GetDeviceHandleFromMigDeviceHandle() (Device, error) {
+	p, ret := d.MigDevice.GetDeviceHandleFromMigDeviceHandle()
+	if ret != nvml.SUCCESS {
+		return nil, ret
+	}
+
+	device, err := d.devicelib.NewDevice(p)
+	if err != nil {
+		return nil, fmt.Errorf("failed to construct device: %v", err)
+	}
+
+	parent := nvmlDevice{
+		Device:    device,
+		devicelib: d.devicelib,
+	}
+	return parent, nil
+}
+
+// IsMigCapable is not supported for MIG devices
+func (d nvmlMigDevice) IsMigCapable() (bool, error) {
+	return false, fmt.Errorf("IsMigCapable is not supported for MIG devices")
+}
+
+// IsMigEnabled is not supported for MIG devices
+func (d nvmlMigDevice) IsMigEnabled() (bool, error) {
+	return false, fmt.Errorf("IsMigEnabled is not supported for MIG devices")
+}
+
+// GetMigDevices is not supported for MIG devices
+func (d nvmlMigDevice) GetMigDevices() ([]Device, error) {
+	return nil, fmt.Errorf("GetMigDevices is not implemented for MIG devices")
+}
+
+// GetCudaComputeCapability is not supported for MIG devices
+func (d nvmlMigDevice) GetCudaComputeCapability() (int, int, error) {
+	return 0, 0, fmt.Errorf("GetCudaComputeCapability is not supported for MIG devices")
+}
+
+// GetName returns the name of the nvmlMigDevice.
+// This is equal to the mig profile.
+func (d nvmlMigDevice) GetName() (string, error) {
+	p, err := d.MigDevice.GetProfile()
+	if err != nil {
+		return "", fmt.Errorf("failed to get MIG profile: %v", err)
+	}
+
+	resourceName := strings.ReplaceAll(p.String(), "+", ".")
+	return resourceName, nil
+}
+
+// GetTotalMemoryMB returns the total memory on a device in MB
+func (d nvmlMigDevice) GetTotalMemoryMB() (uint64, error) {
+	attr, err := d.GetAttributes()
+	if err != nil {
+		return 0, err
+	}
+
+	total, err := totalMemory(attr)
+	if err != nil {
+		return 0, err
+	}
+	return total, nil
+}
+
+func totalMemory(attr map[string]interface{}) (uint64, error) {
+	totalMemory, ok := attr["memory"]
+	if !ok {
+		return 0, fmt.Errorf("no 'memory' attribute available")
+	}
+
+	switch totalMemory := totalMemory.(type) {
+	case uint64:
+		return totalMemory, nil
+	case int:
+		if totalMemory < 0 {
+			return 0, fmt.Errorf("unexpected memory value %v", totalMemory)
+		}
+		//nolint:gosec  // Here we are sure that the value will fit in memory and be positive.
+		return uint64(totalMemory), nil
+	default:
+		return 0, fmt.Errorf("unsupported attribute type %v", totalMemory)
+	}
+}
+
+func (d nvmlMigDevice) GetPCIClass() (uint32, error) {
+	// GPU devices that support MIG do not support switching mode between graphics and compute, so they are always in compute mode.
+	return nvpci.PCI3dControllerClass, nil
+}
+
+func (d nvmlMigDevice) IsFabricAttached() (bool, error) {
+	return false, fmt.Errorf("IsFabricAttached is not supported for MIG devices")
+}
+
+func (d nvmlMigDevice) GetFabricIDs() (string, string, error) {
+	return "", "", fmt.Errorf("GetFabricIDs is not supported for MIG devices")
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/sysfs-device.go b/pkg/nvidia-plugin/pkg/resource/sysfs-device.go
new file mode 100644
index 000000000..a3097a108
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/sysfs-device.go
@@ -0,0 +1,77 @@
+/**
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvpci"
+)
+
+type vfioDevice struct {
+	nvidiaPCIDevice *nvpci.NvidiaPCIDevice
+}
+
+// GetMigDevices returns the list of MIG devices configured on this device
+func (d vfioDevice) GetMigDevices() ([]Device, error) {
+	return nil, nil
+}
+
+// GetCudaComputeCapability is not supported for GPU devices with vfio pci driver.
+func (d vfioDevice) GetCudaComputeCapability() (int, int, error) {
+	return -1, -1, nil
+}
+
+// GetAttributes is only supported for MIG devices.
+func (d vfioDevice) GetAttributes() (map[string]interface{}, error) {
+	return nil, fmt.Errorf("GetAttributes is not supported for non-MIG devices")
+}
+
+// GetDeviceHandleFromMigDeviceHandle is only supported for MIG devices
+func (d vfioDevice) GetDeviceHandleFromMigDeviceHandle() (Device, error) {
+	return nil, fmt.Errorf("GetDeviceHandleFromMigDeviceHandle is not supported for non-MIG devices")
+}
+
+// GetName returns the device name / model.
+func (d vfioDevice) GetName() (string, error) {
+	return d.nvidiaPCIDevice.DeviceName, nil
+}
+
+// GetTotalMemoryMB returns the total memory on a device in MB
+func (d vfioDevice) GetTotalMemoryMB() (uint64, error) {
+	_, val := d.nvidiaPCIDevice.Resources.GetTotalAddressableMemory(true)
+	return val, nil
+}
+
+func (d vfioDevice) IsMigEnabled() (bool, error) {
+	return false, nil
+}
+
+func (d vfioDevice) IsMigCapable() (bool, error) {
+	return false, nil
+}
+
+func (d vfioDevice) GetPCIClass() (uint32, error) {
+	return d.nvidiaPCIDevice.Class, nil
+}
+
+func (d vfioDevice) IsFabricAttached() (bool, error) {
+	return false, nil
+}
+func (d vfioDevice) GetFabricIDs() (string, string, error) {
+	return "", "", fmt.Errorf("GetFabricIDs is not supported for vfio devices")
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/sysfs-lib.go b/pkg/nvidia-plugin/pkg/resource/sysfs-lib.go
new file mode 100644
index 000000000..739d98351
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/sysfs-lib.go
@@ -0,0 +1,74 @@
+/**
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+import (
+	"github.com/NVIDIA/go-nvlib/pkg/nvpci"
+	"k8s.io/klog/v2"
+)
+
+type vfioLib struct {
+	nvpcilib nvpci.Interface
+}
+
+// NewVfioManager returns an resource manger for devices with VFIO PCI driver
+func NewVfioManager() Manager {
+	nvpcilib := nvpci.New()
+	manager := vfioLib{
+		nvpcilib: nvpcilib,
+	}
+	return &manager
+}
+
+// Init is a no-op for the vfio manager
+func (l *vfioLib) Init() error {
+	return nil
+}
+
+// Shutdown is a no-op for the vfio manager
+func (l *vfioLib) Shutdown() (err error) {
+	return nil
+}
+
+// GetDevices returns the devices with VFIO PCI driver available on the system
+func (l *vfioLib) GetDevices() ([]Device, error) {
+	var devices []Device
+	nvdevices, err := l.nvpcilib.GetGPUs()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, dev := range nvdevices {
+		if dev.Driver == "vfio-pci" {
+			vfioDev := vfioDevice{dev}
+			devices = append(devices, vfioDev)
+		} else {
+			klog.Infof("Device not bound to 'vfio-pci'; device: %s driver: '%s'", dev.Address, dev.Driver)
+		}
+	}
+	return devices, nil
+}
+
+// GetCudaDriverVersion is not supported
+func (l *vfioLib) GetCudaDriverVersion() (int, int, error) {
+	return 0, 0, nil
+}
+
+// GetDriverVersion is not supported
+func (l *vfioLib) GetDriverVersion() (string, error) {
+	return "unknown.unknown.unknown", nil
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/testing/resource-testing.go b/pkg/nvidia-plugin/pkg/resource/testing/resource-testing.go
new file mode 100644
index 000000000..968183816
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/testing/resource-testing.go
@@ -0,0 +1,141 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package testing
+
+import (
+	"fmt"
+
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/resource"
+)
+
+// DeviceMock provides an alias that allows for additional functions to be defined.
+type DeviceMock struct {
+	resource.DeviceMock
+}
+
+// NewFullGPU creates a device that can be treated as a full GPU for testing
+func NewFullGPU() resource.Device {
+	return NewDeviceMock(false)
+}
+
+// NewMigEnabledDevice creates a GPU with MIG enabled and the specified MIG devices
+func NewMigEnabledDevice(migs ...*resource.DeviceMock) resource.Device {
+	return NewDeviceMock(true).WithMigDevices(migs...)
+}
+
+// NewDeviceMock creates a devices for testing which can have MIG enabled or disabled.
+func NewDeviceMock(migEnabled bool) *DeviceMock {
+	d := DeviceMock{resource.DeviceMock{
+		GetNameFunc: func() (string, error) { return "MOCKMODEL", nil },
+		GetCudaComputeCapabilityFunc: func() (int, int, error) {
+			if migEnabled {
+				return 0, 0, nil
+			}
+			return 8, 0, nil
+		},
+		GetTotalMemoryMBFunc: func() (uint64, error) { return uint64(300), nil },
+		IsFabricAttachedFunc: func() (bool, error) { return false, nil },
+		IsMigEnabledFunc:     func() (bool, error) { return migEnabled, nil },
+		IsMigCapableFunc:     func() (bool, error) { return migEnabled, nil },
+		GetMigDevicesFunc:    func() ([]resource.Device, error) { return nil, nil },
+		GetPCIClassFunc:      func() (uint32, error) { return 0, nil },
+	}}
+	return &d
+}
+
+func NewDeviceWithPCIClassMock(pciClass uint32) *DeviceMock {
+	d := DeviceMock{resource.DeviceMock{
+		GetPCIClassFunc: func() (uint32, error) { return pciClass, nil },
+	}}
+	return &d
+}
+
+// NewMigDevice creates a MIG devices with the specified attributes for testing
+func NewMigDevice(gi int, ci int, gb uint64, attributes ...map[string]interface{}) *resource.DeviceMock {
+
+	defaultAttributes := map[string]interface{}{
+		"memory":          gb,
+		"multiprocessors": 0,
+		"slices.gi":       gi,
+		"slices.ci":       ci,
+		"engines.copy":    0,
+		"engines.decoder": 0,
+		"engines.encoder": 0,
+		"engines.jpeg":    0,
+		"engines.ofa":     0,
+	}
+	for _, attr := range attributes {
+		for a, v := range attr {
+			defaultAttributes[a] = v
+		}
+	}
+
+	return &resource.DeviceMock{
+		GetNameFunc:       func() (string, error) { return fmt.Sprintf("%dg.%dgb", gi, gb), nil },
+		GetAttributesFunc: func() (map[string]interface{}, error) { return defaultAttributes, nil },
+	}
+}
+
+// WithMigDevices adds the specified MIG devices to the mocked device
+func (d *DeviceMock) WithMigDevices(migs ...*resource.DeviceMock) *DeviceMock {
+	for _, m := range migs {
+		m.GetDeviceHandleFromMigDeviceHandleFunc = func() (resource.Device, error) {
+			return d, nil
+		}
+	}
+	d.GetMigDevicesFunc = func() ([]resource.Device, error) {
+		var devices []resource.Device
+		for _, m := range migs {
+			devices = append(devices, m)
+		}
+		return devices, nil
+	}
+
+	return d
+}
+
+// ManagerMock provides an alias that allows for additional functions to be defined.
+type ManagerMock struct {
+	resource.ManagerMock
+}
+
+// NewManagerMockWithDevices creates a mocked manager with the specified devices
+func NewManagerMockWithDevices(devices ...resource.Device) *ManagerMock {
+	manager := ManagerMock{resource.ManagerMock{
+		InitFunc:     func() error { return nil },
+		ShutdownFunc: func() error { return nil },
+		GetDriverVersionFunc: func() (string, error) {
+			return "400.300", nil
+		},
+		GetDevicesFunc: func() ([]resource.Device, error) {
+			return devices, nil
+		},
+		GetCudaDriverVersionFunc: func() (int, int, error) {
+			return 8, 0, nil
+		},
+	}}
+	return &manager
+}
+
+// WithErrorOnInit sets the Init function for the ManagerMock to error if called.
+func (m *ManagerMock) WithErrorOnInit(err error) *ManagerMock {
+	m.InitFunc = func() error {
+		fmt.Printf("returning error = %v", err)
+		return err
+	}
+	return m
+}
diff --git a/pkg/nvidia-plugin/pkg/resource/types.go b/pkg/nvidia-plugin/pkg/resource/types.go
new file mode 100644
index 000000000..dc6fa6a77
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/resource/types.go
@@ -0,0 +1,45 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package resource
+
+// Manager defines an interface for managing devices
+//
+//go:generate moq -rm -out manager_mock.go . Manager
+type Manager interface {
+	Init() error
+	Shutdown() error
+	GetDevices() ([]Device, error)
+	GetDriverVersion() (string, error)
+	GetCudaDriverVersion() (int, int, error)
+}
+
+// Device defines an interface for a device with which labels are associated
+//
+//go:generate moq -out device_mock.go . Device
+type Device interface {
+	IsFabricAttached() (bool, error)
+	IsMigEnabled() (bool, error)
+	IsMigCapable() (bool, error)
+	GetMigDevices() ([]Device, error)
+	GetAttributes() (map[string]interface{}, error)
+	GetName() (string, error)
+	GetTotalMemoryMB() (uint64, error)
+	GetDeviceHandleFromMigDeviceHandle() (Device, error)
+	GetCudaComputeCapability() (int, int, error)
+	GetPCIClass() (uint32, error)
+	GetFabricIDs() (string, string, error)
+}
diff --git a/pkg/nvidia-plugin/pkg/rm/allocate.go b/pkg/nvidia-plugin/pkg/rm/allocate.go
new file mode 100644
index 000000000..166b68e84
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/allocate.go
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rm
+
+import (
+	"fmt"
+	"sort"
+)
+
+// distributedAlloc returns a list of devices such that any replicated
+// devices are distributed across all replicated GPUs equally. It takes into
+// account already allocated replicas to ensure a proper balance across them.
+func (r *resourceManager) distributedAlloc(available, required []string, size int) ([]string, error) {
+	// Get the set of candidate devices as the difference between available and required.
+	candidates := r.devices.Subset(available).Difference(r.devices.Subset(required)).GetIDs()
+	needed := size - len(required)
+
+	if len(candidates) < needed {
+		return nil, fmt.Errorf("not enough available devices to satisfy allocation")
+	}
+
+	// For each candidate device, build a mapping of (stripped) device ID to
+	// total / available replicas for that device.
+	replicas := make(map[string]*struct{ total, available int })
+	for _, c := range candidates {
+		id := AnnotatedID(c).GetID()
+		if _, exists := replicas[id]; !exists {
+			replicas[id] = &struct{ total, available int }{}
+		}
+		replicas[id].available++
+	}
+	for d := range r.devices {
+		id := AnnotatedID(d).GetID()
+		if _, exists := replicas[id]; !exists {
+			continue
+		}
+		replicas[id].total++
+	}
+
+	// Grab the set of 'needed' devices one-by-one from the candidates list.
+	// Before selecting each candidate, first sort the candidate list using the
+	// replicas map above. After sorting, the first element in the list will
+	// contain the device with the least difference between total and available
+	// replications (based on what's already been allocated). Add this device
+	// to the list of devices to allocate, remove it from the candidate list,
+	// down its available count in the replicas map, and repeat.
+	var devices []string
+	for i := 0; i < needed; i++ {
+		sort.Slice(candidates, func(i, j int) bool {
+			iid := AnnotatedID(candidates[i]).GetID()
+			jid := AnnotatedID(candidates[j]).GetID()
+			idiff := replicas[iid].total - replicas[iid].available
+			jdiff := replicas[jid].total - replicas[jid].available
+			return idiff < jdiff
+		})
+		id := AnnotatedID(candidates[0]).GetID()
+		replicas[id].available--
+		devices = append(devices, candidates[0])
+		candidates = candidates[1:]
+	}
+
+	// Add the set of required devices to this list and return it.
+	devices = append(required, devices...)
+
+	return devices, nil
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map.go b/pkg/nvidia-plugin/pkg/rm/device_map.go
similarity index 73%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map.go
rename to pkg/nvidia-plugin/pkg/rm/device_map.go
index 4b6a43c6e..1dbc439bd 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/device_map.go
+++ b/pkg/nvidia-plugin/pkg/rm/device_map.go
@@ -1,61 +1,59 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
 
 package rm
 
 import (
 	"fmt"
 
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-
 	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
-	spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
 )
 
 type deviceMapBuilder struct {
 	device.Interface
-	config *nvidia.DeviceConfig
+	migStrategy         *string
+	resources           *spec.Resources
+	replicatedResources *spec.ReplicatedResources
+
+	newGPUDevice func(i int, gpu nvml.Device) (string, deviceInfo)
 }
 
 // DeviceMap stores a set of devices per resource name.
 type DeviceMap map[spec.ResourceName]Devices
 
 // NewDeviceMap creates a device map for the specified NVML library and config.
-func NewDeviceMap(nvmllib nvml.Interface, config *nvidia.DeviceConfig) (DeviceMap, error) {
+func NewDeviceMap(infolib info.Interface, devicelib device.Interface, config *nvidia.DeviceConfig) (DeviceMap, error) {
 	b := deviceMapBuilder{
-		Interface: device.New(device.WithNvml(nvmllib)),
-		config:    config,
+		Interface:           devicelib,
+		migStrategy:         config.Flags.MigStrategy,
+		resources:           &config.Resources,
+		replicatedResources: config.Sharing.ReplicatedResources(),
+		newGPUDevice:        newNvmlGPUDevice,
 	}
+
+	if infolib.ResolvePlatform() == info.PlatformWSL {
+		b.newGPUDevice = newWslGPUDevice
+	}
+
 	return b.build()
 }
 
@@ -65,9 +63,9 @@ func (b *deviceMapBuilder) build() (DeviceMap, error) {
 	if err != nil {
 		return nil, fmt.Errorf("error building device map from config.resources: %v", err)
 	}
-	devices, err = updateDeviceMapWithReplicas(b.config, devices)
+	devices, err = updateDeviceMapWithReplicas(b.replicatedResources, devices)
 	if err != nil {
-		return nil, fmt.Errorf("error updating device map with replicas from config.sharing.timeSlicing.resources: %v", err)
+		return nil, fmt.Errorf("error updating device map with replicas from replicatedResources config: %v", err)
 	}
 	return devices, nil
 }
@@ -79,7 +77,7 @@ func (b *deviceMapBuilder) buildDeviceMapFromConfigResources() (DeviceMap, error
 		return nil, fmt.Errorf("error building GPU device map: %v", err)
 	}
 
-	if *b.config.Flags.MigStrategy == spec.MigStrategyNone {
+	if *b.migStrategy == spec.MigStrategyNone {
 		return deviceMap, nil
 	}
 
@@ -89,7 +87,7 @@ func (b *deviceMapBuilder) buildDeviceMapFromConfigResources() (DeviceMap, error
 	}
 
 	var requireUniformMIGDevices bool
-	if *b.config.Flags.MigStrategy == spec.MigStrategySingle {
+	if *b.migStrategy == spec.MigStrategySingle {
 		requireUniformMIGDevices = true
 	}
 
@@ -111,7 +109,7 @@ func (b *deviceMapBuilder) buildDeviceMapFromConfigResources() (DeviceMap, error
 func (b *deviceMapBuilder) buildGPUDeviceMap() (DeviceMap, error) {
 	devices := make(DeviceMap)
 
-	b.VisitDevices(func(i int, gpu device.Device) error {
+	err := b.VisitDevices(func(i int, gpu device.Device) error {
 		name, ret := gpu.GetName()
 		if ret != nvml.SUCCESS {
 			return fmt.Errorf("error getting product name for GPU: %v", ret)
@@ -120,18 +118,18 @@ func (b *deviceMapBuilder) buildGPUDeviceMap() (DeviceMap, error) {
 		if err != nil {
 			return fmt.Errorf("error checking if MIG is enabled on GPU: %v", err)
 		}
-		if migEnabled && *b.config.Flags.MigStrategy != spec.MigStrategyNone {
+		if migEnabled && *b.migStrategy != spec.MigStrategyNone {
 			return nil
 		}
-		for _, resource := range b.config.Resources.GPUs {
+		for _, resource := range b.resources.GPUs {
 			if resource.Pattern.Matches(name) {
-				index, info := newGPUDevice(i, gpu)
+				index, info := b.newGPUDevice(i, gpu)
 				return devices.setEntry(resource.Name, index, info)
 			}
 		}
 		return fmt.Errorf("GPU name '%v' does not match any resource patterns", name)
 	})
-	return devices, nil
+	return devices, err
 }
 
 // buildMigDeviceMap builds a map of resource names to MIG devices
@@ -142,7 +140,7 @@ func (b *deviceMapBuilder) buildMigDeviceMap() (DeviceMap, error) {
 		if err != nil {
 			return fmt.Errorf("error getting MIG profile for MIG device at index '(%v, %v)': %v", i, j, err)
 		}
-		for _, resource := range b.config.Resources.MIGs {
+		for _, resource := range b.resources.MIGs {
 			if resource.Pattern.Matches(migProfile.String()) {
 				index, info := newMigDevice(i, j, mig)
 				return devices.setEntry(resource.Name, index, info)
@@ -168,9 +166,11 @@ func (b *deviceMapBuilder) assertAllMigDevicesAreValid(uniform bool) error {
 		if err != nil {
 			return err
 		}
-		if len(migDevices) == 0 {
-			i := 0
-			return fmt.Errorf("device %v has an invalid MIG configuration", i)
+		if uniform && len(migDevices) == 0 {
+			return fmt.Errorf("device %v has no MIG devices configured", i)
+		}
+		if !uniform && len(migDevices) == 0 {
+			klog.Warningf("device %v has no MIG devices configured", i)
 		}
 		return nil
 	})
@@ -198,9 +198,9 @@ func (b *deviceMapBuilder) assertAllMigDevicesAreValid(uniform bool) error {
 	})
 }
 
-// setEntry sets the DeviceMap entry for the specified resource.
-func (d DeviceMap) setEntry(name spec.ResourceName, index string, info deviceInfo) error {
-	dev, err := BuildDevice(index, info)
+// setEntry sets the DeviceMap entry for the specified resource
+func (d DeviceMap) setEntry(name spec.ResourceName, index string, device deviceInfo) error {
+	dev, err := BuildDevice(index, device)
 	if err != nil {
 		return fmt.Errorf("error building Device: %v", err)
 	}
@@ -280,13 +280,14 @@ func (d DeviceMap) getIDsOfDevicesToReplicate(r *spec.ReplicatedResource) ([]str
 	return nil, fmt.Errorf("unexpected error")
 }
 
-// updateDeviceMapWithReplicas returns an updated map of resource names to devices with replica information from spec.Config.Sharing.TimeSlicing.Resources
-func updateDeviceMapWithReplicas(config *nvidia.DeviceConfig, oDevices DeviceMap) (DeviceMap, error) {
+// updateDeviceMapWithReplicas returns an updated map of resource names to devices with replica
+// information from the active replicated resources config.
+func updateDeviceMapWithReplicas(replicatedResources *spec.ReplicatedResources, oDevices DeviceMap) (DeviceMap, error) {
 	devices := make(DeviceMap)
 
-	// Begin by walking config.Sharing.TimeSlicing.Resources and building a map of just the resource names.
+	// Begin by walking replicatedResources.Resources and building a map of just the resource names.
 	names := make(map[spec.ResourceName]bool)
-	for _, r := range config.Sharing.TimeSlicing.Resources {
+	for _, r := range replicatedResources.Resources {
 		names[r.Name] = true
 	}
 
@@ -297,8 +298,9 @@ func updateDeviceMapWithReplicas(config *nvidia.DeviceConfig, oDevices DeviceMap
 		}
 	}
 
-	// Walk TimeSlicing.Resources and update devices in the device map as appropriate.
-	for _, r := range config.Sharing.TimeSlicing.Resources {
+	// Walk shared Resources and update devices in the device map as appropriate.
+	for _, resource := range replicatedResources.Resources {
+		r := resource
 		// Get the IDs of the devices we want to replicate from oDevices
 		ids, err := oDevices.getIDsOfDevicesToReplicate(&r)
 		if err != nil {
@@ -325,6 +327,7 @@ func updateDeviceMapWithReplicas(config *nvidia.DeviceConfig, oDevices DeviceMap
 				annotatedID := string(NewAnnotatedID(id, i))
 				replicatedDevice := *(oDevices[r.Name][id])
 				replicatedDevice.ID = annotatedID
+				replicatedDevice.Replicas = r.Replicas
 				devices.insert(name, &replicatedDevice)
 			}
 		}
diff --git a/pkg/nvidia-plugin/pkg/rm/device_map_test.go b/pkg/nvidia-plugin/pkg/rm/device_map_test.go
new file mode 100644
index 000000000..61b0056b2
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/device_map_test.go
@@ -0,0 +1,109 @@
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package rm
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+func TestDeviceMapInsert(t *testing.T) {
+	device0 := Device{Device: pluginapi.Device{ID: "0"}}
+	device0withIndex := Device{Device: pluginapi.Device{ID: "0"}, Index: "index"}
+	device1 := Device{Device: pluginapi.Device{ID: "1"}}
+
+	testCases := []struct {
+		description       string
+		deviceMap         DeviceMap
+		key               string
+		value             *Device
+		expectedDeviceMap DeviceMap
+	}{
+		{
+			description: "insert into empty map",
+			deviceMap:   make(DeviceMap),
+			key:         "resource",
+			value:       &device0,
+			expectedDeviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+				},
+			},
+		},
+		{
+			description: "add to existing resource",
+			deviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+				},
+			},
+			key:   "resource",
+			value: &device1,
+			expectedDeviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+					"1": &device1,
+				},
+			},
+		},
+		{
+			description: "add new resource",
+			deviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+				},
+			},
+			key:   "resource1",
+			value: &device0,
+			expectedDeviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+				},
+				"resource1": Devices{
+					"0": &device0,
+				},
+			},
+		},
+		{
+			description: "overwrite existing device",
+			deviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0,
+				},
+			},
+			key:   "resource",
+			value: &device0withIndex,
+			expectedDeviceMap: DeviceMap{
+				"resource": Devices{
+					"0": &device0withIndex,
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			tc.deviceMap.insert(spec.ResourceName(tc.key), tc.value)
+
+			require.EqualValues(t, tc.expectedDeviceMap, tc.deviceMap)
+		})
+	}
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/devices.go b/pkg/nvidia-plugin/pkg/rm/devices.go
similarity index 71%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/devices.go
rename to pkg/nvidia-plugin/pkg/rm/devices.go
index 668404455..f3b77c5fb 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/devices.go
+++ b/pkg/nvidia-plugin/pkg/rm/devices.go
@@ -1,33 +1,17 @@
 /*
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package rm
@@ -37,14 +21,19 @@ import (
 	"strconv"
 	"strings"
 
-	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 )
 
-// Device wraps kubeletdevicepluginv1beta1.Device with extra metadata and functions.
+// Device wraps pluginapi.Device with extra metadata and functions.
 type Device struct {
-	kubeletdevicepluginv1beta1.Device
-	Paths []string
-	Index string
+	pluginapi.Device
+	Paths             []string
+	Index             string
+	TotalMemory       uint64
+	ComputeCapability string
+	// Replicas stores the total number of times this device is replicated.
+	// If this is 0 or 1 then the device is not shared.
+	Replicas int
 }
 
 // deviceInfo defines the information the required to construct a Device
@@ -52,6 +41,8 @@ type deviceInfo interface {
 	GetUUID() (string, error)
 	GetPaths() ([]string, error)
 	GetNumaNode() (bool, int, error)
+	GetTotalMemory() (uint64, error)
+	GetComputeCapability() (string, error)
 }
 
 // Devices wraps a map[string]*Device with some functions.
@@ -80,14 +71,27 @@ func BuildDevice(index string, d deviceInfo) (*Device, error) {
 		return nil, fmt.Errorf("error getting device NUMA node: %v", err)
 	}
 
-	dev := Device{}
+	totalMemory, err := d.GetTotalMemory()
+	if err != nil {
+		return nil, fmt.Errorf("error getting device memory: %w", err)
+	}
+
+	computeCapability, err := d.GetComputeCapability()
+	if err != nil {
+		return nil, fmt.Errorf("error getting device compute capability: %w", err)
+	}
+
+	dev := Device{
+		TotalMemory:       totalMemory,
+		ComputeCapability: computeCapability,
+	}
 	dev.ID = uuid
 	dev.Index = index
 	dev.Paths = paths
-	dev.Health = kubeletdevicepluginv1beta1.Healthy
+	dev.Health = pluginapi.Healthy
 	if hasNuma {
-		dev.Topology = &kubeletdevicepluginv1beta1.TopologyInfo{
-			Nodes: []*kubeletdevicepluginv1beta1.NUMANode{
+		dev.Topology = &pluginapi.TopologyInfo{
+			Nodes: []*pluginapi.NUMANode{
 				{
 					ID: int64(numa),
 				},
@@ -155,28 +159,28 @@ func (ds Devices) GetIDs() []string {
 	return res
 }
 
-// GetPluginDevices returns the plugin Devices from all devices in the Devices
-func (ds Devices) GetPluginDevices(count uint) []*kubeletdevicepluginv1beta1.Device {
-	var res []*kubeletdevicepluginv1beta1.Device
-
-	if !strings.Contains(ds.GetIDs()[0], "MIG") {
-		for _, dev := range ds {
-			for i := uint(0); i < count; i++ {
-				id := fmt.Sprintf("%v-%v", dev.ID, i)
-				res = append(res, &kubeletdevicepluginv1beta1.Device{
-					ID:       id,
-					Health:   dev.Health,
-					Topology: nil,
-				})
-			}
-		}
-	} else {
-		for _, d := range ds {
-			res = append(res, &d.Device)
+// GetUUIDs returns the uuids associated with the Device in the set.
+func (ds Devices) GetUUIDs() []string {
+	var res []string
+	seen := make(map[string]bool)
+	for _, d := range ds {
+		uuid := d.GetUUID()
+		if seen[uuid] {
+			continue
 		}
-
+		seen[uuid] = true
+		res = append(res, uuid)
 	}
+	return res
+}
 
+// GetPluginDevices returns the plugin Devices from all devices in the Devices
+func (ds Devices) GetPluginDevices() []*pluginapi.Device {
+	var res []*pluginapi.Device
+	for _, device := range ds {
+		d := device
+		res = append(res, &d.Device)
+	}
 	return res
 }
 
@@ -198,7 +202,7 @@ func (ds Devices) GetPaths() []string {
 	return res
 }
 
-// AlignedAllocationSupported checks whether all devices support an alligned allocation
+// AlignedAllocationSupported checks whether all devices support an aligned allocation
 func (ds Devices) AlignedAllocationSupported() bool {
 	for _, d := range ds {
 		if !d.AlignedAllocationSupported() {
@@ -208,7 +212,7 @@ func (ds Devices) AlignedAllocationSupported() bool {
 	return true
 }
 
-// AlignedAllocationSupported checks whether the device supports an alligned allocation
+// AlignedAllocationSupported checks whether the device supports an aligned allocation
 func (d Device) AlignedAllocationSupported() bool {
 	if d.IsMigDevice() {
 		return false
@@ -241,10 +245,7 @@ func NewAnnotatedID(id string, replica int) AnnotatedID {
 // HasAnnotations checks if an AnnotatedID has any annotations or not.
 func (r AnnotatedID) HasAnnotations() bool {
 	split := strings.SplitN(string(r), "::", 2)
-	if len(split) != 2 {
-		return false
-	}
-	return true
+	return len(split) == 2
 }
 
 // Split splits a AnnotatedID into its ID and replica number parts.
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/health.go b/pkg/nvidia-plugin/pkg/rm/health.go
similarity index 81%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/health.go
rename to pkg/nvidia-plugin/pkg/rm/health.go
index 8dce3cc07..3a308ff3e 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/health.go
+++ b/pkg/nvidia-plugin/pkg/rm/health.go
@@ -1,33 +1,17 @@
 /*
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package rm
@@ -38,7 +22,7 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
 	"k8s.io/klog/v2"
 )
 
@@ -49,9 +33,6 @@ const (
 	// this is in addition to the Application errors that are already ignored.
 	envDisableHealthChecks = "DP_DISABLE_HEALTHCHECKS"
 	allHealthChecks        = "xids"
-
-	// maxSuccessiveEventErrorCount sets the number of errors waiting for events before marking all devices as unhealthy.
-	maxSuccessiveEventErrorCount = 3
 )
 
 // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
@@ -102,11 +83,13 @@ func (r *nvmlResourceManager) checkHealth(stop <-chan interface{}, devices Devic
 	if ret != nvml.SUCCESS {
 		return fmt.Errorf("failed to create event set: %v", ret)
 	}
-	defer eventSet.Free()
+	defer func() {
+		_ = eventSet.Free()
+	}()
 
 	parentToDeviceMap := make(map[string]*Device)
-	deviceIDToGiMap := make(map[string]int)
-	deviceIDToCiMap := make(map[string]int)
+	deviceIDToGiMap := make(map[string]uint32)
+	deviceIDToCiMap := make(map[string]uint32)
 
 	eventMask := uint64(nvml.EventTypeXidCriticalError | nvml.EventTypeDoubleBitEccError | nvml.EventTypeSingleBitEccError)
 	for _, d := range devices {
@@ -129,7 +112,7 @@ func (r *nvmlResourceManager) checkHealth(stop <-chan interface{}, devices Devic
 
 		supportedEvents, ret := gpu.GetSupportedEventTypes()
 		if ret != nvml.SUCCESS {
-			klog.Infof("Unable to determine the supported events for %v: %v; marking it as unhealthy", d.ID, ret)
+			klog.Infof("unable to determine the supported events for %v: %v; marking it as unhealthy", d.ID, ret)
 			unhealthy <- d
 			continue
 		}
@@ -193,7 +176,7 @@ func (r *nvmlResourceManager) checkHealth(stop <-chan interface{}, devices Devic
 		if d.IsMigDevice() && e.GpuInstanceId != 0xFFFFFFFF && e.ComputeInstanceId != 0xFFFFFFFF {
 			gi := deviceIDToGiMap[d.ID]
 			ci := deviceIDToCiMap[d.ID]
-			if !(uint32(gi) == e.GpuInstanceId && uint32(ci) == e.ComputeInstanceId) {
+			if !(gi == e.GpuInstanceId && ci == e.ComputeInstanceId) {
 				continue
 			}
 			klog.Infof("Event for mig device %v (gi=%v, ci=%v)", d.ID, gi, ci)
@@ -232,7 +215,7 @@ func getAdditionalXids(input string) []uint64 {
 // getDevicePlacement returns the placement of the specified device.
 // For a MIG device the placement is defined by the 3-tuple <parent UUID, GI, CI>
 // For a full device the returned 3-tuple is the device's uuid and 0xFFFFFFFF for the other two elements.
-func (r *nvmlResourceManager) getDevicePlacement(d *Device) (string, int, int, error) {
+func (r *nvmlResourceManager) getDevicePlacement(d *Device) (string, uint32, uint32, error) {
 	if !d.IsMigDevice() {
 		return d.GetUUID(), 0xFFFFFFFF, 0xFFFFFFFF, nil
 	}
@@ -240,7 +223,7 @@ func (r *nvmlResourceManager) getDevicePlacement(d *Device) (string, int, int, e
 }
 
 // getMigDeviceParts returns the parent GI and CI ids of the MIG device.
-func (r *nvmlResourceManager) getMigDeviceParts(d *Device) (string, int, int, error) {
+func (r *nvmlResourceManager) getMigDeviceParts(d *Device) (string, uint32, uint32, error) {
 	if !d.IsMigDevice() {
 		return "", 0, 0, fmt.Errorf("cannot get GI and CI of full device")
 	}
@@ -267,13 +250,14 @@ func (r *nvmlResourceManager) getMigDeviceParts(d *Device) (string, int, int, er
 		if ret != nvml.SUCCESS {
 			return "", 0, 0, fmt.Errorf("failed to get Compute Instance ID: %v", ret)
 		}
-		return parentUUID, gi, ci, nil
+		//nolint:gosec  // We know that the values returned from Get*InstanceId are within the valid uint32 range.
+		return parentUUID, uint32(gi), uint32(ci), nil
 	}
 	return parseMigDeviceUUID(uuid)
 }
 
 // parseMigDeviceUUID splits the MIG device UUID into the parent device UUID and ci and gi
-func parseMigDeviceUUID(mig string) (string, int, int, error) {
+func parseMigDeviceUUID(mig string) (string, uint32, uint32, error) {
 	tokens := strings.SplitN(mig, "-", 2)
 	if len(tokens) != 2 || tokens[0] != "MIG" {
 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
@@ -284,15 +268,24 @@ func parseMigDeviceUUID(mig string) (string, int, int, error) {
 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
 	}
 
-	gi, err := strconv.ParseInt(tokens[1], 10, 32)
+	gi, err := toUint32(tokens[1])
 	if err != nil {
 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
 	}
 
-	ci, err := strconv.ParseInt(tokens[2], 10, 32)
+	ci, err := toUint32(tokens[2])
 	if err != nil {
 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
 	}
 
-	return tokens[0], int(gi), int(ci), nil
+	return tokens[0], gi, ci, nil
+}
+
+func toUint32(s string) (uint32, error) {
+	u, err := strconv.ParseUint(s, 10, 32)
+	if err != nil {
+		return 0, err
+	}
+	//nolint:gosec  // Since we parse s with a 32-bit size this will not overflow.
+	return uint32(u), nil
 }
diff --git a/pkg/nvidia-plugin/pkg/rm/health_test.go b/pkg/nvidia-plugin/pkg/rm/health_test.go
new file mode 100644
index 000000000..101aadf78
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/health_test.go
@@ -0,0 +1,74 @@
+/**
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package rm
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestGetAdditionalXids(t *testing.T) {
+	testCases := []struct {
+		input    string
+		expected []uint64
+	}{
+		{},
+		{
+			input: ",",
+		},
+		{
+			input: "not-an-int",
+		},
+		{
+			input:    "68",
+			expected: []uint64{68},
+		},
+		{
+			input: "-68",
+		},
+		{
+			input:    "68  ",
+			expected: []uint64{68},
+		},
+		{
+			input:    "68,",
+			expected: []uint64{68},
+		},
+		{
+			input:    ",68",
+			expected: []uint64{68},
+		},
+		{
+			input:    "68,67",
+			expected: []uint64{68, 67},
+		},
+		{
+			input:    "68,not-an-int,67",
+			expected: []uint64{68, 67},
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
+			xids := getAdditionalXids(tc.input)
+
+			require.EqualValues(t, tc.expected, xids)
+		})
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/rm/helper.go b/pkg/nvidia-plugin/pkg/rm/helper.go
new file mode 100644
index 000000000..580282c59
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/helper.go
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rm
+
+// int8Slice wraps an []int8 with more functions.
+type int8Slice []int8
+
+// String turns a nil terminated int8Slice into a string
+func (s int8Slice) String() string {
+	var b []byte
+	for _, c := range s {
+		if c == 0 {
+			break
+		}
+		b = append(b, byte(c))
+	}
+	return string(b)
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices.go b/pkg/nvidia-plugin/pkg/rm/nvml_devices.go
similarity index 65%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices.go
rename to pkg/nvidia-plugin/pkg/rm/nvml_devices.go
index fe9375b8c..a8c642f0b 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_devices.go
+++ b/pkg/nvidia-plugin/pkg/rm/nvml_devices.go
@@ -1,33 +1,17 @@
 /*
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
  *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package rm
@@ -39,10 +23,9 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/mig"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
 
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-	"github.com/NVIDIA/go-nvlib/pkg/nvml"
+	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/mig"
 )
 
 const (
@@ -61,16 +44,16 @@ type nvmlMigDevice nvmlDevice
 var _ deviceInfo = (*nvmlDevice)(nil)
 var _ deviceInfo = (*nvmlMigDevice)(nil)
 
-func newGPUDevice(i int, gpu nvml.Device) (string, deviceInfo) {
+func newNvmlGPUDevice(i int, gpu nvml.Device) (string, deviceInfo) {
 	index := fmt.Sprintf("%v", i)
-	isWsl, _ := info.New().HasDXCore()
-	if isWsl {
-		return index, wslDevice{gpu}
-	}
-
 	return index, nvmlDevice{gpu}
 }
 
+func newWslGPUDevice(i int, gpu nvml.Device) (string, deviceInfo) {
+	index := fmt.Sprintf("%v", i)
+	return index, wslDevice{gpu}
+}
+
 func newMigDevice(i int, j int, mig nvml.Device) (string, nvmlMigDevice) {
 	return fmt.Sprintf("%v:%v", i, j), nvmlMigDevice{mig}
 }
@@ -100,6 +83,24 @@ func (d nvmlDevice) GetPaths() ([]string, error) {
 	return []string{path}, nil
 }
 
+// GetComputeCapability returns the CUDA Compute Capability for the device.
+func (d nvmlDevice) GetComputeCapability() (string, error) {
+	major, minor, ret := d.Device.GetCudaComputeCapability()
+	if ret != nvml.SUCCESS {
+		return "", ret
+	}
+	return fmt.Sprintf("%d.%d", major, minor), nil
+}
+
+// GetComputeCapability returns the CUDA Compute Capability for the device.
+func (d nvmlMigDevice) GetComputeCapability() (string, error) {
+	parent, ret := d.Device.GetDeviceHandleFromMigDeviceHandle()
+	if ret != nvml.SUCCESS {
+		return "", fmt.Errorf("failed to get parent device: %w", ret)
+	}
+	return nvmlDevice{parent}.GetComputeCapability()
+}
+
 // GetPaths returns the paths for a MIG device
 func (d nvmlMigDevice) GetPaths() ([]string, error) {
 	capDevicePaths, err := mig.GetMigCapabilityDevicePaths()
@@ -148,13 +149,13 @@ func (d nvmlMigDevice) GetPaths() ([]string, error) {
 
 // GetNumaNode returns the NUMA node associated with the GPU device
 func (d nvmlDevice) GetNumaNode() (bool, int, error) {
-	pciInfo, ret := d.GetPciInfo()
+	info, ret := d.GetPciInfo()
 	if ret != nvml.SUCCESS {
 		return false, 0, fmt.Errorf("error getting PCI Bus Info of device: %v", ret)
 	}
 
 	// Discard leading zeros.
-	busID := strings.ToLower(strings.TrimPrefix(int8Slice(pciInfo.BusId[:]).String(), "0000"))
+	busID := strings.ToLower(strings.TrimPrefix(int8Slice(info.BusId[:]).String(), "0000"))
 
 	b, err := os.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", busID))
 	if err != nil {
@@ -182,3 +183,21 @@ func (d nvmlMigDevice) GetNumaNode() (bool, int, error) {
 
 	return nvmlDevice{parent}.GetNumaNode()
 }
+
+// GetTotalMemory returns the total memory available on the device.
+func (d nvmlDevice) GetTotalMemory() (uint64, error) {
+	info, ret := d.Device.GetMemoryInfo()
+	if ret != nvml.SUCCESS {
+		return 0, ret
+	}
+	return info.Total, nil
+}
+
+// GetTotalMemory returns the total memory available on the device.
+func (d nvmlMigDevice) GetTotalMemory() (uint64, error) {
+	info, ret := d.Device.GetMemoryInfo()
+	if ret != nvml.SUCCESS {
+		return 0, ret
+	}
+	return info.Total, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/rm/nvml_manager.go b/pkg/nvidia-plugin/pkg/rm/nvml_manager.go
new file mode 100644
index 000000000..e071620a2
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/nvml_manager.go
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rm
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-gpuallocator/gpuallocator"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	"k8s.io/klog/v2"
+)
+
+type nvmlResourceManager struct {
+	resourceManager
+	nvml nvml.Interface
+}
+
+var _ ResourceManager = (*nvmlResourceManager)(nil)
+
+// NewNVMLResourceManagers returns a set of ResourceManagers, one for each NVML resource in 'config'.
+func NewNVMLResourceManagers(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *nvidia.DeviceConfig) ([]ResourceManager, error) {
+	ret := nvmllib.Init()
+	if ret != nvml.SUCCESS {
+		return nil, fmt.Errorf("failed to initialize NVML: %v", ret)
+	}
+	defer func() {
+		ret := nvmllib.Shutdown()
+		if ret != nvml.SUCCESS {
+			klog.Infof("Error shutting down NVML: %v", ret)
+		}
+	}()
+
+	deviceMap, err := NewDeviceMap(infolib, devicelib, config)
+	if err != nil {
+		return nil, fmt.Errorf("error building device map: %v", err)
+	}
+
+	var rms []ResourceManager
+	for resourceName, devices := range deviceMap {
+		if len(devices) == 0 {
+			continue
+		}
+		r := &nvmlResourceManager{
+			resourceManager: resourceManager{
+				config:   config,
+				resource: resourceName,
+				devices:  devices,
+			},
+			nvml: nvmllib,
+		}
+		rms = append(rms, r)
+	}
+
+	return rms, nil
+}
+
+// GetPreferredAllocation runs an allocation algorithm over the inputs.
+// The algorithm chosen is based both on the incoming set of available devices and various config settings.
+func (r *nvmlResourceManager) GetPreferredAllocation(available, required []string, size int) ([]string, error) {
+	return r.getPreferredAllocation(available, required, size)
+}
+
+// GetDevicePaths returns the required and optional device nodes for the requested resources
+func (r *nvmlResourceManager) GetDevicePaths(ids []string) []string {
+	paths := []string{
+		"/dev/nvidiactl",
+		"/dev/nvidia-uvm",
+		"/dev/nvidia-uvm-tools",
+		"/dev/nvidia-modeset",
+	}
+
+	return append(paths, r.Devices().Subset(ids).GetPaths()...)
+}
+
+// CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
+func (r *nvmlResourceManager) CheckHealth(stop <-chan interface{}, unhealthy chan<- *Device) error {
+	return r.checkHealth(stop, r.devices, unhealthy)
+}
+
+// getPreferredAllocation runs an allocation algorithm over the inputs.
+// The algorithm chosen is based both on the incoming set of available devices and various config settings.
+func (r *nvmlResourceManager) getPreferredAllocation(available, required []string, size int) ([]string, error) {
+	// If all of the available devices are full GPUs without replicas, then
+	// calculate an aligned allocation across those devices.
+	if r.Devices().AlignedAllocationSupported() && !AnnotatedIDs(available).AnyHasAnnotations() {
+		return r.alignedAlloc(available, required, size)
+	}
+
+	// Otherwise, distribute them evenly across all replicated GPUs
+	return r.distributedAlloc(available, required, size)
+}
+
+// alignedAlloc shells out to the alignedAllocationPolicy that is set in
+// order to calculate the preferred allocation.
+func (r *nvmlResourceManager) alignedAlloc(available, required []string, size int) ([]string, error) {
+	var devices []string
+
+	linkedDevices, err := gpuallocator.NewDevices(
+		gpuallocator.WithNvmlLib(r.nvml),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("unable to get device link information: %w", err)
+	}
+
+	availableDevices, err := linkedDevices.Filter(available)
+	if err != nil {
+		return nil, fmt.Errorf("unable to retrieve list of available devices: %v", err)
+	}
+
+	requiredDevices, err := linkedDevices.Filter(required)
+	if err != nil {
+		return nil, fmt.Errorf("unable to retrieve list of required devices: %v", err)
+	}
+
+	allocatedDevices := gpuallocator.NewBestEffortPolicy().Allocate(availableDevices, requiredDevices, size)
+	for _, device := range allocatedDevices {
+		devices = append(devices, device.UUID)
+	}
+
+	return devices, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/rm/rm.go b/pkg/nvidia-plugin/pkg/rm/rm.go
new file mode 100644
index 000000000..5267b60b1
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/rm.go
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rm
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
+	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+// resourceManager forms the base type for specific resource manager implementations
+type resourceManager struct {
+	config   *nvidia.DeviceConfig
+	resource spec.ResourceName
+	devices  Devices
+}
+
+// ResourceManager provides an interface for listing a set of Devices and checking health on them
+type ResourceManager interface {
+	Resource() spec.ResourceName
+	Devices() Devices
+	GetDevicePaths([]string) []string
+	GetPreferredAllocation(available, required []string, size int) ([]string, error)
+	CheckHealth(stop <-chan interface{}, unhealthy chan<- *Device) error
+	ValidateRequest(AnnotatedIDs) error
+}
+
+// Resource gets the resource name associated with the ResourceManager
+func (r *resourceManager) Resource() spec.ResourceName {
+	return r.resource
+}
+
+// Devices gets the devices managed by the ResourceManager
+func (r *resourceManager) Devices() Devices {
+	return r.devices
+}
+
+var errInvalidRequest = errors.New("invalid request")
+
+// ValidateRequest checks the requested IDs against the resource manager configuration.
+// It asserts that all requested IDs are known to the resource manager and that the request is
+// valid for a specified sharing configuration.
+func (r *resourceManager) ValidateRequest(ids AnnotatedIDs) error {
+	// Assert that all requested IDs are known to the resource manager
+	for _, id := range ids {
+		if !r.devices.Contains(id) {
+			return fmt.Errorf("%w: unknown device: %s", errInvalidRequest, id)
+		}
+	}
+
+	// If the devices being allocated are replicas, then (conditionally)
+	// error out if more than one resource is being allocated.
+	includesReplicas := ids.AnyHasAnnotations()
+	numRequestedDevices := len(ids)
+	switch r.config.Sharing.SharingStrategy() {
+	case spec.SharingStrategyTimeSlicing:
+		if includesReplicas && numRequestedDevices > 1 && r.config.Sharing.ReplicatedResources().FailRequestsGreaterThanOne {
+			return fmt.Errorf("%w: maximum request size for shared resources is 1; found %d", errInvalidRequest, numRequestedDevices)
+		}
+	case spec.SharingStrategyMPS:
+		// For MPS sharing, we explicitly ignore the FailRequestsGreaterThanOne
+		// value in the sharing settings.
+		// This setting was added to timeslicing after the initial release and
+		// is set to `false` to maintain backward compatibility with existing
+		// deployments. If we do extend MPS to allow multiple devices to be
+		// requested, the MPS API will be extended separately from the
+		// time-slicing API.
+		if includesReplicas && numRequestedDevices > 1 {
+			return fmt.Errorf("%w: maximum request size for shared resources is 1; found %d", errInvalidRequest, numRequestedDevices)
+		}
+	}
+	return nil
+}
+
+// AddDefaultResourcesToConfig adds default resource matching rules to config.Resources
+func AddDefaultResourcesToConfig(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) error {
+	_ = config.Resources.AddGPUResource("*", "gpu")
+	if config.Flags.MigStrategy == nil {
+		return nil
+	}
+	switch *config.Flags.MigStrategy {
+	case spec.MigStrategySingle:
+		return config.Resources.AddMIGResource("*", "gpu")
+	case spec.MigStrategyMixed:
+		hasNVML, reason := infolib.HasNvml()
+		if !hasNVML {
+			klog.Warningf("mig-strategy=%q is only supported with NVML", spec.MigStrategyMixed)
+			klog.Warningf("NVML not detected: %v", reason)
+			return nil
+		}
+
+		ret := nvmllib.Init()
+		if ret != nvml.SUCCESS {
+			if *config.Flags.FailOnInitError {
+				return fmt.Errorf("failed to initialize NVML: %v", ret)
+			}
+			return nil
+		}
+		defer func() {
+			ret := nvmllib.Shutdown()
+			if ret != nvml.SUCCESS {
+				klog.Errorf("Error shutting down NVML: %v", ret)
+			}
+		}()
+
+		return devicelib.VisitMigProfiles(func(p device.MigProfile) error {
+			info := p.GetInfo()
+			if info.C != info.G {
+				return nil
+			}
+			resourceName := strings.ReplaceAll("mig-"+p.String(), "+", ".")
+			return config.Resources.AddMIGResource(p.String(), resourceName)
+		})
+	}
+	return nil
+}
diff --git a/pkg/nvidia-plugin/pkg/rm/rm_test.go b/pkg/nvidia-plugin/pkg/rm/rm_test.go
new file mode 100644
index 000000000..24fbc8c11
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/rm_test.go
@@ -0,0 +1,195 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package rm
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
+	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
+)
+
+func TestValidateRequest(t *testing.T) {
+	testCases := []struct {
+		description       string
+		devices           Devices
+		sharing           spec.Sharing
+		requestDevicesIDs []string
+
+		expectedError error
+	}{
+		{
+			description: "valid device IDs -- no sharing",
+			devices: Devices{
+				"device0": nil,
+				"device1": nil,
+			},
+			requestDevicesIDs: []string{"device1"},
+		},
+		{
+			description: "invalid device IDs -- no sharing",
+			devices: Devices{
+				"device0": nil,
+				"device1": nil,
+			},
+			requestDevicesIDs: []string{"device1", "device2"},
+			expectedError:     errInvalidRequest,
+		},
+		{
+			description: "timeslicing with single device",
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1"},
+		},
+		{
+			description: "timeslicing with two devices",
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1", "device1::0"},
+		},
+		{
+			description: "timeslicing with two devices -- failRequestsGreaterThanOne",
+			sharing: spec.Sharing{
+				TimeSlicing: spec.ReplicatedResources{
+					FailRequestsGreaterThanOne: true,
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1", "device1::0"},
+			expectedError:     errInvalidRequest,
+		},
+		{
+			description: "MPS with single device",
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1"},
+		},
+		{
+			description: "MPS with two devices",
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1", "device1::0"},
+			expectedError:     errInvalidRequest,
+		},
+		{
+			description: "MPS with two devices -- failRequestsGreaterThanOne",
+			sharing: spec.Sharing{
+				MPS: &spec.ReplicatedResources{
+					FailRequestsGreaterThanOne: true,
+					Resources: []spec.ReplicatedResource{
+						{
+							Name:     "nvidia.com/gpu",
+							Replicas: 2,
+						},
+					},
+				},
+			},
+			devices: Devices{
+				"device0::0": nil,
+				"device0::1": nil,
+				"device1::0": nil,
+				"device1::1": nil,
+			},
+			requestDevicesIDs: []string{"device0::1", "device1::0"},
+			expectedError:     errInvalidRequest,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.description, func(t *testing.T) {
+			r := resourceManager{
+				config: &nvidia.DeviceConfig{
+					Config: &spec.Config{
+						Sharing: tc.sharing,
+					},
+				},
+				devices: tc.devices,
+			}
+			err := r.ValidateRequest(tc.requestDevicesIDs)
+			require.ErrorIs(t, err, tc.expectedError)
+		})
+	}
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_devices.go b/pkg/nvidia-plugin/pkg/rm/tegra_devices.go
similarity index 53%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_devices.go
rename to pkg/nvidia-plugin/pkg/rm/tegra_devices.go
index 4e824d378..44d72ec72 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_devices.go
+++ b/pkg/nvidia-plugin/pkg/rm/tegra_devices.go
@@ -1,34 +1,18 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
 
 package rm
 
@@ -83,3 +67,13 @@ func (d *tegraDevice) GetPaths() ([]string, error) {
 func (d *tegraDevice) GetNumaNode() (bool, int, error) {
 	return false, -1, nil
 }
+
+// GetTotalMemory is unsupported for a Tegra device.
+func (d *tegraDevice) GetTotalMemory() (uint64, error) {
+	return 0, nil
+}
+
+// GetComputeCapability is unimplemented for a Tegra device.
+func (d *tegraDevice) GetComputeCapability() (string, error) {
+	return "0.0", nil
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_manager.go b/pkg/nvidia-plugin/pkg/rm/tegra_manager.go
similarity index 55%
rename from pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_manager.go
rename to pkg/nvidia-plugin/pkg/rm/tegra_manager.go
index 5350ac03a..b3ae4d863 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/tegra_manager.go
+++ b/pkg/nvidia-plugin/pkg/rm/tegra_manager.go
@@ -1,34 +1,18 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The HAMi Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-/*
- * Licensed to NVIDIA CORPORATION under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. NVIDIA CORPORATION licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright The HAMi Authors. See
- * GitHub history for details.
- */
+/**
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
 
 package rm
 
@@ -51,9 +35,9 @@ func NewTegraResourceManagers(config *nvidia.DeviceConfig) ([]ResourceManager, e
 		return nil, fmt.Errorf("error building Tegra device map: %v", err)
 	}
 
-	deviceMap, err = updateDeviceMapWithReplicas(config, deviceMap)
+	deviceMap, err = updateDeviceMapWithReplicas(config.Sharing.ReplicatedResources(), deviceMap)
 	if err != nil {
-		return nil, fmt.Errorf("error updating device map with replicas from config.sharing.timeSlicing.resources: %v", err)
+		return nil, fmt.Errorf("error updating device map with replicas from sharing resources: %v", err)
 	}
 
 	var rms []ResourceManager
diff --git a/pkg/nvidia-plugin/pkg/rm/wsl_devices.go b/pkg/nvidia-plugin/pkg/rm/wsl_devices.go
new file mode 100644
index 000000000..b8319409f
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/rm/wsl_devices.go
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rm
+
+type wslDevice nvmlDevice
+
+var _ deviceInfo = (*wslDevice)(nil)
+
+// GetUUID returns the UUID of the device
+func (d wslDevice) GetUUID() (string, error) {
+	return nvmlDevice(d).GetUUID()
+}
+
+// GetPaths returns the paths for a tegra device.
+func (d wslDevice) GetPaths() ([]string, error) {
+	return []string{"/dev/dxg"}, nil
+}
+
+// GetNumaNode returns the NUMA node associated with the GPU device
+func (d wslDevice) GetNumaNode() (bool, int, error) {
+	return nvmlDevice(d).GetNumaNode()
+}
+
+// GetTotalMemory returns the total memory available on the device.
+func (d wslDevice) GetTotalMemory() (uint64, error) {
+	return nvmlDevice(d).GetTotalMemory()
+}
+
+// GetComputeCapability returns the CUDA compute capability for the device.
+func (d wslDevice) GetComputeCapability() (string, error) {
+	return nvmlDevice(d).GetComputeCapability()
+}
diff --git a/pkg/nvidia-plugin/pkg/vgpu/pciutil.go b/pkg/nvidia-plugin/pkg/vgpu/pciutil.go
new file mode 100644
index 000000000..ea1664961
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/vgpu/pciutil.go
@@ -0,0 +1,204 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package vgpu
+
+import (
+	"fmt"
+	"os"
+	"path"
+	"strings"
+)
+
+// NvidiaPCI interface allows us to get a list of all NVIDIA PCI devices
+type NvidiaPCI interface {
+	Devices() ([]*PCIDevice, error)
+}
+
+// PCIDevice represents a single PCI device
+type PCIDevice struct {
+	Path    string
+	Address string
+	Class   string
+	Vendor  string
+	Config  []byte
+}
+
+const (
+	// PciDevicesRoot represents base path for all pci devices under sysfs
+	PciDevicesRoot = "/sys/bus/pci/devices"
+	// PciStatusByte indicates status byte
+	PciStatusByte = 0x06
+	// PciStatusCapabilityList indicates if capability list is supported
+	PciStatusCapabilityList = 0x10
+	// PciCapabilityList indicates offset of first capability list entry
+	PciCapabilityList = 0x34
+	// PciCapabilityListID indicates offset for capability id
+	PciCapabilityListID = 0
+	// PciCapabilityListNext indicates offset for next capability in the list
+	PciCapabilityListNext = 1
+	// PciCapabilityLength indicates offset for capability length
+	PciCapabilityLength = 2
+	// PciCapabilityVendorSpecificID indicates PCI vendor specific capability id
+	PciCapabilityVendorSpecificID = 0x09
+	// PciNvidiaVendorID represents PCI vendor id for Nvidia
+	PciNvidiaVendorID = "0x10de"
+)
+
+// NvidiaPCILib implements the NvidiaPCI interface
+type NvidiaPCILib struct{}
+
+// NewNvidiaPCILib returns an instance of NvidiaPCILib implementing the NvidiaPCI interface
+func NewNvidiaPCILib() NvidiaPCI {
+	return &NvidiaPCILib{}
+}
+
+// Devices returns all PCI devices on the system
+func (p *NvidiaPCILib) Devices() ([]*PCIDevice, error) {
+	deviceDirs, err := os.ReadDir(PciDevicesRoot)
+	if err != nil {
+		return nil, fmt.Errorf("unable to read PCI bus devices: %v", err)
+	}
+
+	var devices []*PCIDevice
+	for _, deviceDir := range deviceDirs {
+		devicePath := path.Join(PciDevicesRoot, deviceDir.Name())
+		address := deviceDir.Name()
+
+		vendor, err := os.ReadFile(path.Join(devicePath, "vendor"))
+		if err != nil {
+			return nil, fmt.Errorf("unable to read PCI device vendor id for %s: %v", address, err)
+		}
+
+		if strings.TrimSpace(string(vendor)) != PciNvidiaVendorID {
+			continue
+		}
+
+		class, err := os.ReadFile(path.Join(devicePath, "class"))
+		if err != nil {
+			return nil, fmt.Errorf("unable to read PCI device class for %s: %v", address, err)
+		}
+
+		config, err := os.ReadFile(path.Join(devicePath, "config"))
+		if err != nil {
+			return nil, fmt.Errorf("unable to read PCI configuration space for %s: %v", address, err)
+		}
+
+		device := &PCIDevice{
+			Path:    devicePath,
+			Address: address,
+			Vendor:  strings.TrimSpace(string(vendor)),
+			Class:   string(class)[0:4],
+			Config:  config,
+		}
+
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
+// GetVendorSpecificCapability returns the vendor specific capability from configuration space
+func (d *PCIDevice) GetVendorSpecificCapability() ([]byte, error) {
+	if len(d.Config) < 256 {
+		return nil, fmt.Errorf("entire PCI configuration is not read for device %s. Please run GFD with privileged mode to read complete PCI configuration data", d.Address)
+	}
+
+	if d.Config[PciStatusByte]&PciStatusCapabilityList == 0 {
+		return nil, nil
+	}
+
+	var visited [256]byte
+	pos := GetByte(d.Config, PciCapabilityList)
+	for pos != 0 {
+		id := GetByte(d.Config, pos+PciCapabilityListID)
+		next := GetByte(d.Config, pos+PciCapabilityListNext)
+		length := GetByte(d.Config, pos+PciCapabilityLength)
+
+		if visited[pos] != 0 {
+			// chain looped
+			break
+		}
+		if id == 0xff {
+			// chain broken
+			break
+		}
+		if id == PciCapabilityVendorSpecificID {
+			capability := d.Config[pos+PciCapabilityListID : pos+PciCapabilityListID+length]
+			return capability, nil
+		}
+
+		visited[pos]++
+		pos = next
+	}
+
+	return nil, nil
+}
+
+// GetByte returns a single byte of data at specified position
+func GetByte(buffer []byte, pos uint8) uint8 {
+	return buffer[pos]
+}
+
+// GetWord returns 2 bytes of data from specified position
+func GetWord(buffer []byte, pos int) uint16 {
+	return uint16(buffer[pos]) | (uint16(buffer[pos+1]) << 8)
+}
+
+// GetLong returns 4 bytes of data from specified position
+func GetLong(buffer []byte, pos int) uint32 {
+	return uint32(buffer[pos]) |
+		uint32(buffer[pos+1])<<8 |
+		uint32(buffer[pos+2])<<16 |
+		uint32(buffer[pos+3])<<24
+}
+
+// MockNvidiaPCI represents mock of NvidiaPCI interface
+type MockNvidiaPCI struct {
+	devices []*PCIDevice
+}
+
+// Devices returns PCI devices with mocked data
+func (p *MockNvidiaPCI) Devices() ([]*PCIDevice, error) {
+	return p.devices, nil
+}
+
+// NewMockNvidiaPCI initializes and returns mock PCI interface type
+func NewMockNvidiaPCI() NvidiaPCI {
+	var (
+		gpuPassThroughConfig = []byte{0xde, 0x10, 0x8a, 0x11, 0x07, 0x04, 0x10, 0x00, 0xa1, 0x00, 0x00, 0x03, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0xec, 0x0c, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x10, 0x14, 0x10, 0x00, 0x00, 0x00, 0xee, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x01, 0x00, 0x00, 0xde, 0x10, 0x14, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xce, 0xd6, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x68, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x05, 0x78, 0x81, 0x00, 0x00, 0x70, 0xe6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x10, 0xb4, 0x02, 0x00, 0xe1, 0x8d, 0x64, 0x00, 0x10, 0x29, 0x00, 0x00, 0x03, 0x3d, 0x45, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x14, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+		vgpuConfig           = []byte{0xde, 0x10, 0xb8, 0x1e, 0x02, 0x05, 0xff, 0x06, 0xa1, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x0c, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x10, 0x0f, 0x13, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xce, 0xd6, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x81, 0x00, 0x00, 0x00, 0xe0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x68, 0x1b, 0x56, 0x46, 0x00, 0x16, 0x34, 0x36, 0x30, 0x2e, 0x31, 0x36, 0x00, 0x00, 0x00, 0x00, 0x72, 0x34, 0x36, 0x30, 0x5f, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+	)
+
+	return &MockNvidiaPCI{
+		devices: []*PCIDevice{
+			{
+				Path:    "",
+				Address: "passthrough",
+				Vendor:  "0x10de",
+				Class:   "300",
+				Config:  gpuPassThroughConfig,
+			},
+			{
+				Path:    "",
+				Address: "vgpu",
+				Vendor:  "0x10de",
+				Class:   "300",
+				Config:  vgpuConfig,
+			},
+		},
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/vgpu/pciutil_test.go b/pkg/nvidia-plugin/pkg/vgpu/pciutil_test.go
new file mode 100644
index 000000000..eaacff609
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/vgpu/pciutil_test.go
@@ -0,0 +1,42 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package vgpu
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestGetVendorSpecificCapability(t *testing.T) {
+	devices, _ := NewMockNvidiaPCI().Devices()
+	for _, device := range devices {
+		// check for vendor id
+		require.Equal(t, "0x10de", fmt.Sprintf("0x%x", GetWord(device.Config, 0)), "Nvidia PCI Vendor ID")
+		// check for vendor specific capability
+		capability, err := device.GetVendorSpecificCapability()
+		require.NoError(t, err, "Get vendor specific capability from configuration space")
+		require.NotZero(t, len(capability), "Vendor capability record")
+		if device.Address == "passthrough" {
+			require.Equal(t, 20, len(capability), "Vendor capability length for passthrough device")
+		}
+		if device.Address == "vgpu" {
+			require.Equal(t, 27, len(capability), "Vendor capability length for vgpu device")
+		}
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/vgpu/vgpu.go b/pkg/nvidia-plugin/pkg/vgpu/vgpu.go
new file mode 100644
index 000000000..828e2cb7e
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/vgpu/vgpu.go
@@ -0,0 +1,153 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package vgpu
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Interface allows us to get a list of vGPU specific PCI devices
+type Interface interface {
+	Devices() ([]*Device, error)
+}
+
+// Device is just an alias to a PCIDevice
+type Device struct {
+	pci            *PCIDevice
+	vGPUCapability []byte
+}
+
+// Info represents vGPU driver info running on underlying hypervisor host.
+type Info struct {
+	HostDriverVersion string
+	HostDriverBranch  string
+}
+
+const (
+	// VGPUCapabilityRecordStart indicates offset of beginning vGPU capability record
+	VGPUCapabilityRecordStart uint8 = 5
+	// HostDriverVersionLength indicates max length of driver version
+	HostDriverVersionLength = 10
+	// HostDriverBranchLength indicates max length of driver branch
+	HostDriverBranchLength = 10
+)
+
+// Lib implements the NvidiaVGPU interface
+type Lib struct {
+	pci NvidiaPCI
+}
+
+// NewVGPULib returns an instance of Lib implementing the VGPU interface
+func NewVGPULib(pci NvidiaPCI) Interface {
+	return &Lib{pci: pci}
+}
+
+// NewMockVGPU initializes and returns mock Interface interface type
+func NewMockVGPU() Interface {
+	return NewVGPULib(NewMockNvidiaPCI())
+}
+
+// Devices returns all vGPU devices attached to the guest
+func (v *Lib) Devices() ([]*Device, error) {
+	pciDevices, err := v.pci.Devices()
+	if err != nil {
+		return nil, fmt.Errorf("error getting NVIDIA specific PCI devices: %v", err)
+	}
+
+	var vgpus []*Device
+	for _, device := range pciDevices {
+		capability, err := device.GetVendorSpecificCapability()
+		if err != nil {
+			return nil, fmt.Errorf("unable to read vendor specific capability for %s: %v", device.Address, err)
+		}
+		if capability == nil {
+			continue
+		}
+		if exists := v.IsVGPUDevice(capability); exists {
+			vgpu := &Device{
+				pci:            device,
+				vGPUCapability: capability,
+			}
+			vgpus = append(vgpus, vgpu)
+		}
+	}
+	return vgpus, nil
+}
+
+// IsVGPUDevice returns true if the device is of type vGPU
+func (v *Lib) IsVGPUDevice(capability []byte) bool {
+	if len(capability) < 5 {
+		return false
+	}
+	// check for vGPU signature, 0x56, 0x46 i.e "VF"
+	if capability[3] != 0x56 {
+		return false
+	}
+	if capability[4] != 0x46 {
+		return false
+	}
+	return true
+}
+
+// GetInfo returns information about vGPU manager running on the underlying hypervisor host
+func (d *Device) GetInfo() (*Info, error) {
+	if len(d.vGPUCapability) == 0 {
+		return nil, fmt.Errorf("vendor capability record is not populated for device %s", d.pci.Address)
+	}
+
+	// traverse vGPU vendor capability records until host driver version record(id: 0) is found
+	var hostDriverVersion string
+	var hostDriverBranch string
+	foundDriverVersionRecord := false
+	pos := VGPUCapabilityRecordStart
+	record := GetByte(d.vGPUCapability, VGPUCapabilityRecordStart)
+	for record != 0 && int(pos) < len(d.vGPUCapability) {
+		// find next record
+		recordLength := GetByte(d.vGPUCapability, pos+1)
+		pos += recordLength
+		record = GetByte(d.vGPUCapability, pos)
+	}
+
+	if record == 0 && int(pos+2+HostDriverVersionLength+HostDriverBranchLength) <= len(d.vGPUCapability) {
+		foundDriverVersionRecord = true
+		// found vGPU host driver version record type
+		// initialized at record data byte, i.e pos + 1(record id byte) + 1(record lengh byte)
+		i := pos + 2
+		// 10 bytes of driver version
+		for ; i < pos+2+HostDriverVersionLength; i++ {
+			hostDriverVersion += string(GetByte(d.vGPUCapability, i))
+		}
+		hostDriverVersion = strings.Trim(hostDriverVersion, "\x00")
+		// 10 bytes of driver branch
+		for ; i < pos+2+HostDriverVersionLength+HostDriverBranchLength; i++ {
+			hostDriverBranch += string(GetByte(d.vGPUCapability, i))
+		}
+		hostDriverBranch = strings.Trim(hostDriverBranch, "\x00")
+	}
+
+	if !foundDriverVersionRecord {
+		return nil, fmt.Errorf("cannot find driver version record in vendor specific capability for device %s", d.pci.Address)
+	}
+
+	info := &Info{
+		HostDriverVersion: hostDriverVersion,
+		HostDriverBranch:  hostDriverBranch,
+	}
+
+	return info, nil
+}
diff --git a/pkg/nvidia-plugin/pkg/vgpu/vgpu_test.go b/pkg/nvidia-plugin/pkg/vgpu/vgpu_test.go
new file mode 100644
index 000000000..1d8cd5e44
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/vgpu/vgpu_test.go
@@ -0,0 +1,74 @@
+/**
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package vgpu
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+// MockVGPU represents mock of VGPU interface
+type MockVGPU struct {
+	devices []*Device
+}
+
+// Devices returns VGPU devices with mocked data
+func (p *MockVGPU) Devices() ([]*Device, error) {
+	return p.devices, nil
+}
+
+func TestIsVGPUDevice(t *testing.T) {
+	mockVGPU := NewMockVGPU().(*Lib)
+	devices, _ := mockVGPU.pci.Devices()
+	for _, device := range devices {
+		// check for vendor id
+		require.Equal(t, "0x10de", fmt.Sprintf("0x%x", GetWord(device.Config, 0)), "Nvidia PCI Vendor ID")
+		// check for vendor capability records
+		capability, err := device.GetVendorSpecificCapability()
+		require.NoError(t, err, "Get vendor capabilities from configuration space")
+		require.NotZero(t, len(capability), "Vendor capability record")
+		if device.Address == "passthrough" {
+			require.False(t, mockVGPU.IsVGPUDevice(capability), "Is not a virtual GPU device")
+			require.Equal(t, 20, len(capability), "Vendor capability length for passthrough device")
+		}
+		if device.Address == "vgpu" {
+			require.Equal(t, 27, len(capability), "Vendor capability length for vgpu device")
+			require.Equal(t, uint8(9), GetByte(capability, 0), "Vendor capability ID")
+		}
+	}
+}
+
+func TestVGPUGetInfo(t *testing.T) {
+	devices, _ := NewMockVGPU().Devices()
+	for _, device := range devices {
+		if device.pci.Address == "vgpu" {
+			require.NotEmpty(t, device.pci.Config, "Device Configuration data")
+			require.Equal(t, len(device.pci.Config), 256, "Device configuration data length")
+
+			require.NotEmpty(t, device.vGPUCapability, "Vendor capability record")
+			require.Equal(t, device.vGPUCapability[0], uint8(9), "Vendor capability id")
+
+			info, err := device.GetInfo()
+			require.NoError(t, err, "Get host driver version and branch")
+			require.NotNil(t, info, "Host driver info")
+			require.Equal(t, "460.16", info.HostDriverVersion, "Host driver version")
+			require.Equal(t, "r460_00", info.HostDriverBranch, "Host driver branch")
+		}
+	}
+}
diff --git a/pkg/nvidia-plugin/pkg/watch/watchers.go b/pkg/nvidia-plugin/pkg/watch/watchers.go
new file mode 100644
index 000000000..06ccceac7
--- /dev/null
+++ b/pkg/nvidia-plugin/pkg/watch/watchers.go
@@ -0,0 +1,49 @@
+/*
+# Copyright NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+package watch
+
+import (
+	"os"
+	"os/signal"
+
+	"github.com/fsnotify/fsnotify"
+)
+
+// Files creates a Watcher for the specified files.
+func Files(files ...string) (*fsnotify.Watcher, error) {
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, f := range files {
+		err = watcher.Add(f)
+		if err != nil {
+			watcher.Close()
+			return nil, err
+		}
+	}
+
+	return watcher, nil
+}
+
+// Signals creats a channel for the specified signals.
+func Signals(sigs ...os.Signal) chan os.Signal {
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, sigs...)
+
+	return sigChan
+}
diff --git a/version.mk b/version.mk
index e303f2251..204237baa 100644
--- a/version.mk
+++ b/version.mk
@@ -1,13 +1,21 @@
-GO=go
-GO111MODULE=on
-CMDS=scheduler vGPUmonitor
-DEVICES=nvidia
-OUTPUT_DIR=bin
-TARGET_ARCH=amd64
-GOLANG_IMAGE=golang:1.22.5-bullseye
-NVIDIA_IMAGE=nvidia/cuda:12.3.2-devel-ubuntu20.04
-DEST_DIR=/usr/local/vgpu/
+# Build configuration
+GO := go
+GO111MODULE := on
+CMDS := scheduler vGPUmonitor
+DEVICES := nvidia
+ARCH := linux-amd64
 
-VERSION = v0.0.1
-IMG_NAME =hami
-IMG_TAG="${IMG_NAME}:${VERSION}"
\ No newline at end of file
+# Path configuration
+OUTPUT_DIR := bin
+TARGET_ARCH := amd64
+DEST_DIR := /usr/local/vgpu
+
+# Base images
+GOLANG_IMAGE := golang:1.22.5-bullseye
+NVIDIA_DEVEL_IMAGE:= nvcr.io/nvidia/cuda:12.6.3-devel-ubuntu22.04
+NVIDIA_IMAGE := nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
+
+# Version control
+VERSION := v0.0.1
+IMG_NAME := hami-device-plugin
+IMG_TAG := ${IMG_NAME}:${VERSION}
\ No newline at end of file

From 3d965e7118352acf0fe892eec08542b1feb0d5a6 Mon Sep 17 00:00:00 2001
From: haitwang-cloud <haitao_wht@outlook.com>
Date: Tue, 1 Apr 2025 10:57:08 +0800
Subject: [PATCH 2/5] feat: enhance GetPluginDevices to support device split
 count

---
 pkg/nvidia-plugin/pkg/plugin/server.go |  2 +-
 pkg/nvidia-plugin/pkg/rm/devices.go    | 21 +++++++++++++++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/pkg/nvidia-plugin/pkg/plugin/server.go b/pkg/nvidia-plugin/pkg/plugin/server.go
index 37643f21a..af8c3843f 100644
--- a/pkg/nvidia-plugin/pkg/plugin/server.go
+++ b/pkg/nvidia-plugin/pkg/plugin/server.go
@@ -659,7 +659,7 @@ func (plugin *NvidiaDevicePlugin) deviceIDsFromAnnotatedDeviceIDs(ids []string)
 }
 
 func (plugin *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device {
-	return plugin.rm.Devices().GetPluginDevices()
+	return plugin.rm.Devices().GetPluginDevices(plugin.schedulerConfig.DeviceSplitCount)
 }
 
 // updateResponseForDeviceListEnvVar sets the environment variable for the requested devices.
diff --git a/pkg/nvidia-plugin/pkg/rm/devices.go b/pkg/nvidia-plugin/pkg/rm/devices.go
index f3b77c5fb..150c9a19b 100644
--- a/pkg/nvidia-plugin/pkg/rm/devices.go
+++ b/pkg/nvidia-plugin/pkg/rm/devices.go
@@ -175,11 +175,24 @@ func (ds Devices) GetUUIDs() []string {
 }
 
 // GetPluginDevices returns the plugin Devices from all devices in the Devices
-func (ds Devices) GetPluginDevices() []*pluginapi.Device {
+func (ds Devices) GetPluginDevices(count uint) []*pluginapi.Device {
 	var res []*pluginapi.Device
-	for _, device := range ds {
-		d := device
-		res = append(res, &d.Device)
+	if !strings.Contains(ds.GetIDs()[0], "MIG") {
+		for _, dev := range ds {
+			for i := uint(0); i < count; i++ {
+				id := fmt.Sprintf("%v-%v", dev.ID, i)
+				res = append(res, &pluginapi.Device{
+					ID:       id,
+					Health:   dev.Health,
+					Topology: nil,
+				})
+			}
+		}
+	} else {
+		for _, device := range ds {
+			d := device
+			res = append(res, &d.Device)
+		}
 	}
 	return res
 }

From 0fc742bcd9cfd27841ee28f06d919befa4026959 Mon Sep 17 00:00:00 2001
From: haitwang-cloud <haitao_wht@outlook.com>
Date: Tue, 1 Apr 2025 16:29:47 +0800
Subject: [PATCH 3/5] refactor: update AddDefaultResourcesToConfig function and
 remove unused MPS files

---
 cmd/device-plugin/nvidia/main.go              |   2 +-
 pkg/nvidia-plugin/mps-control-daemon/main.go  | 255 ----------------
 .../mps-control-daemon/mount/mount-shm.go     | 108 -------
 .../mps-control-daemon/mps/daemon.go          | 280 ------------------
 .../mps-control-daemon/mps/device.go          |  55 ----
 .../mps-control-daemon/mps/device_test.go     | 112 -------
 .../mps-control-daemon/mps/log-tailer.go      |  69 -----
 .../mps-control-daemon/mps/manager.go         | 112 -------
 .../mps-control-daemon/mps/options.go         |  29 --
 .../mps-control-daemon/mps/root.go            |  59 ----
 pkg/nvidia-plugin/pkg/plugin/mps.go           |  91 ------
 pkg/nvidia-plugin/pkg/plugin/server.go        |  47 +--
 pkg/nvidia-plugin/pkg/rm/rm.go                |  14 +-
 13 files changed, 23 insertions(+), 1210 deletions(-)
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/main.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/device.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/options.go
 delete mode 100644 pkg/nvidia-plugin/mps-control-daemon/mps/root.go
 delete mode 100644 pkg/nvidia-plugin/pkg/plugin/mps.go

diff --git a/cmd/device-plugin/nvidia/main.go b/cmd/device-plugin/nvidia/main.go
index cec7923ca..33a9e9071 100644
--- a/cmd/device-plugin/nvidia/main.go
+++ b/cmd/device-plugin/nvidia/main.go
@@ -357,7 +357,7 @@ func startPlugins(c *cli.Context, o *options) ([]plugin.Interface, bool, error)
 
 	// Update the configuration file with default resources.
 	klog.Info("Updating config with default resource matching patterns.")
-	err = rm.AddDefaultResourcesToConfig(infolib, nvmllib, devicelib, devConfig.Config)
+	err = rm.AddDefaultResourcesToConfig(infolib, nvmllib, devicelib, devConfig)
 	if err != nil {
 		return nil, false, fmt.Errorf("unable to add default resources to config: %v", err)
 	}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/main.go b/pkg/nvidia-plugin/mps-control-daemon/main.go
deleted file mode 100644
index 29259c29c..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/main.go
+++ /dev/null
@@ -1,255 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package main
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"os"
-	"syscall"
-	"time"
-
-	"github.com/urfave/cli/v2"
-	"k8s.io/klog/v2"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
-	nvinfo "github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-	"github.com/NVIDIA/go-nvml/pkg/nvml"
-
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mount"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mps"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/info"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/logger"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/watch"
-
-	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
-)
-
-// Config represents a collection of config options for the device plugin.
-type Config struct {
-	configFile string
-
-	// flags stores the CLI flags for later processing.
-	flags []cli.Flag
-}
-
-func main() {
-	config := &Config{}
-
-	c := cli.NewApp()
-	c.Name = "NVIDIA MPS Control Daemon"
-	c.Version = info.GetVersionString()
-	c.Action = func(ctx *cli.Context) error {
-		return start(ctx, config)
-	}
-	c.Commands = []*cli.Command{
-		mount.NewCommand(),
-	}
-
-	config.flags = []cli.Flag{
-		&cli.StringFlag{
-			Name:        "config-file",
-			Usage:       "the path to a config file as an alternative to command line options or environment variables",
-			Destination: &config.configFile,
-			EnvVars:     []string{"CONFIG_FILE"},
-		},
-		&cli.StringFlag{
-			Name:    "mig-strategy",
-			Value:   spec.MigStrategyNone,
-			Usage:   "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]",
-			EnvVars: []string{"MIG_STRATEGY"},
-		},
-	}
-	c.Flags = config.flags
-
-	klog.InfoS(c.Name, "version", c.Version)
-	err := c.Run(os.Args)
-	if err != nil {
-		klog.Error(err)
-		os.Exit(1)
-	}
-}
-
-// TODO: This needs to do similar validation to the plugin.
-func validateFlags(config *spec.Config) error {
-	return nil
-}
-
-// loadConfig loads the config from the spec file.
-func (cfg *Config) loadConfig(c *cli.Context) (*spec.Config, error) {
-	config, err := spec.NewConfig(c, cfg.flags)
-	if err != nil {
-		return nil, fmt.Errorf("unable to finalize config: %w", err)
-	}
-	err = validateFlags(config)
-	if err != nil {
-		return nil, fmt.Errorf("unable to validate flags: %w", err)
-	}
-	config.Flags.GFD = nil
-
-	return config, nil
-}
-
-// loadConfig loads the config from the spec file.
-func (cfg *Config) loadNvidiaConfig(c *cli.Context) (*nvidia.DeviceConfig, error) {
-	devcfg := &nvidia.DeviceConfig{}
-
-	config, err := spec.NewConfig(c, cfg.flags)
-	if err != nil {
-		return nil, fmt.Errorf("unable to finalize config: %w", err)
-	}
-	err = validateFlags(config)
-	if err != nil {
-		return nil, fmt.Errorf("unable to validate flags: %w", err)
-	}
-	config.Flags.GFD = nil
-	// Set the config in the device config.
-	devcfg.Config = config
-	return devcfg, nil
-}
-
-func start(c *cli.Context, cfg *Config) error {
-	klog.Info("Starting OS watcher.")
-	sigs := watch.Signals(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
-	var started bool
-	var restartTimeout <-chan time.Time
-	var daemons []*mps.Daemon
-restart:
-	// If we are restarting, stop daemons from previous run.
-	if started {
-		err := stopDaemons(daemons...)
-		if err != nil {
-			return fmt.Errorf("error stopping plugins from previous run: %v", err)
-		}
-	}
-
-	klog.Info("Starting Daemons.")
-	daemons, restartDaemons, err := startDaemons(c, cfg)
-	if err != nil {
-		return fmt.Errorf("error starting plugins: %v", err)
-	}
-	started = true
-
-	if restartDaemons {
-		klog.Infof("Failed to start one or more MPS deamons. Retrying in 30s...")
-		restartTimeout = time.After(30 * time.Second)
-	}
-
-	// Start an infinite loop, waiting for several indicators to either log
-	// some messages, trigger a restart of the plugins, or exit the program.
-	for {
-		select {
-		// If the restart timeout has expired, then restart the plugins
-		case <-restartTimeout:
-			goto restart
-
-		// Watch for any signals from the OS. On SIGHUP, restart this loop,
-		// restarting all of the plugins in the process. On all other
-		// signals, exit the loop and exit the program.
-		case s := <-sigs:
-			switch s {
-			case syscall.SIGHUP:
-				klog.Info("Received SIGHUP, restarting.")
-				goto restart
-			default:
-				klog.Infof("Received signal \"%v\", shutting down.", s)
-				goto exit
-			}
-		}
-	}
-exit:
-	if err := stopDaemons(daemons...); err != nil {
-		return fmt.Errorf("error stopping daemons: %v", err)
-	}
-	return nil
-}
-
-func startDaemons(c *cli.Context, cfg *Config) ([]*mps.Daemon, bool, error) {
-	// Load the configuration file
-	klog.Info("Loading configuration.")
-	config, err := cfg.loadNvidiaConfig(c)
-	if err != nil {
-		return nil, false, fmt.Errorf("unable to load config: %v", err)
-	}
-	spec.DisableResourceNamingInConfig(logger.ToKlog, config.Config)
-
-	nvmllib := nvml.New()
-	devicelib := device.New(nvmllib)
-	infolib := nvinfo.New(
-		nvinfo.WithNvmlLib(nvmllib),
-		nvinfo.WithDeviceLib(devicelib),
-	)
-
-	// Update the configuration file with default resources.
-	klog.Info("Updating config with default resource matching patterns.")
-	err = rm.AddDefaultResourcesToConfig(infolib, nvmllib, devicelib, config.Config)
-	if err != nil {
-		return nil, false, fmt.Errorf("unable to add default resources to config: %v", err)
-	}
-
-	// Print the config to the output.
-	configJSON, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return nil, false, fmt.Errorf("failed to marshal config to JSON: %v", err)
-	}
-	klog.Infof("\nRunning with config:\n%v", string(configJSON))
-
-	// Get the set of daemons.
-	// Note that a daemon is only created for resources with at least one device.
-	klog.Info("Retrieving MPS daemons.")
-	mpsDaemons, err := mps.NewDaemons(infolib, nvmllib, devicelib,
-		mps.WithConfig(config),
-	)
-	if err != nil {
-		return nil, false, fmt.Errorf("error getting daemons: %v", err)
-	}
-
-	if len(mpsDaemons) == 0 {
-		klog.Info("No devices are configured for MPS sharing; Waiting indefinitely.")
-	}
-
-	// Loop through all MPS daemons and start them.
-	// If any daemon fails to start, all daemons are started again.
-	for _, mpsDaemon := range mpsDaemons {
-		if err := mpsDaemon.Start(); err != nil {
-			klog.Errorf("Failed to start MPS daemon: %v", err)
-			return mpsDaemons, true, nil
-		}
-	}
-	readyFile, err := os.Create("/mps/.ready")
-	if err != nil {
-		return mpsDaemons, true, fmt.Errorf("failed to create .ready file")
-	}
-	defer readyFile.Close()
-
-	return mpsDaemons, false, nil
-}
-
-func stopDaemons(mpsDaemons ...*mps.Daemon) error {
-	if err := os.Remove("/mps/.ready"); err != nil {
-		klog.Warningf("Failed to remove .ready file: %v", err)
-	}
-	klog.Info("Stopping MPS daemons.")
-	var errs error
-	for _, p := range mpsDaemons {
-		errs = errors.Join(errs, p.Stop())
-	}
-	return errs
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go b/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
deleted file mode 100644
index 83825e812..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mount/mount-shm.go
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mount
-
-import (
-	"bufio"
-	"fmt"
-	"os"
-	"os/exec"
-	"strconv"
-	"strings"
-
-	"github.com/urfave/cli/v2"
-	"k8s.io/klog/v2"
-	"k8s.io/mount-utils"
-)
-
-// NewCommand constructs a mount command.
-func NewCommand() *cli.Command {
-	c := cli.Command{
-		Name:   "mount-shm",
-		Usage:  "Set up the /dev/shm mount required by the MPS daemon",
-		Action: mountShm,
-	}
-
-	return &c
-}
-
-// mountShm creates a tmpfs mount at /mps/shm to be used by the mps control daemon.
-func mountShm(c *cli.Context) error {
-	mountExecutable, err := exec.LookPath("mount")
-	if err != nil {
-		return fmt.Errorf("error finding 'mount' executable: %w", err)
-	}
-	mounter := mount.New(mountExecutable)
-
-	// TODO: /mps should be configurable.
-	shmDir := "/mps/shm"
-	err = mount.CleanupMountPoint(shmDir, mounter, true)
-	if err != nil {
-		return fmt.Errorf("error unmounting %v: %w", shmDir, err)
-	}
-
-	if err := os.MkdirAll(shmDir, 0755); err != nil {
-		return fmt.Errorf("error creating directory %v: %w", shmDir, err)
-	}
-
-	sizeArg := fmt.Sprintf("size=%v", getDefaultShmSize())
-	mountOptions := []string{"rw", "nosuid", "nodev", "noexec", "relatime", sizeArg}
-	if err := mounter.Mount("shm", shmDir, "tmpfs", mountOptions); err != nil {
-		return fmt.Errorf("error mounting %v as tmpfs: %w", shmDir, err)
-	}
-
-	return nil
-}
-
-// getDefaultShmSize returns the default size for the tmpfs to be created.
-// This reads /proc/meminfo to get the total memory to calculate this. If this
-// fails a fallback size of 65536k is used.
-func getDefaultShmSize() string {
-	const fallbackSize = "65536k"
-
-	meminfo, err := os.Open("/proc/meminfo")
-	if err != nil {
-		klog.ErrorS(err, "failed to open /proc/meminfo")
-		return fallbackSize
-	}
-	defer func() {
-		_ = meminfo.Close()
-	}()
-
-	scanner := bufio.NewScanner(meminfo)
-	for scanner.Scan() {
-		line := scanner.Text()
-		if !strings.HasPrefix(line, "MemTotal:") {
-			continue
-		}
-
-		parts := strings.SplitN(strings.TrimSpace(strings.TrimPrefix(line, "MemTotal:")), " ", 2)
-		memTotal, err := strconv.Atoi(parts[0])
-		if err != nil {
-			klog.ErrorS(err, "could not convert MemTotal to an integer")
-			return fallbackSize
-		}
-
-		var unit string
-		if len(parts) == 2 {
-			unit = string(parts[1][0])
-		}
-
-		return fmt.Sprintf("%d%s", memTotal/2, unit)
-	}
-	return fallbackSize
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go b/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
deleted file mode 100644
index 5d23c61ae..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/daemon.go
+++ /dev/null
@@ -1,280 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"bytes"
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-
-	"github.com/opencontainers/selinux/go-selinux"
-	"k8s.io/klog/v2"
-
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
-)
-
-type computeMode string
-
-const (
-	mpsControlBin = "nvidia-cuda-mps-control"
-
-	computeModeExclusiveProcess = computeMode("EXCLUSIVE_PROCESS")
-	computeModeDefault          = computeMode("DEFAULT")
-
-	unprivilegedContainerSELinuxLabel = "system_u:object_r:container_file_t:s0"
-)
-
-// Daemon represents an MPS daemon.
-// It is associated with a specific kubernets resource and is responsible for
-// starting and stopping the deamon as well as ensuring that the memory and
-// thread limits are set for the devices that the resource makes available.
-type Daemon struct {
-	rm rm.ResourceManager
-	// root represents the root at which the files and folders controlled by the
-	// daemon are created. These include the log and pipe directories.
-	root Root
-	// logTailer tails the MPS control daemon logs.
-	logTailer *tailer
-}
-
-// NewDaemon creates an MPS daemon instance.
-func NewDaemon(rm rm.ResourceManager, root Root) *Daemon {
-	return &Daemon{
-		rm:   rm,
-		root: root,
-	}
-}
-
-// Devices returns the list of devices under the control of this MPS daemon.
-func (d *Daemon) Devices() rm.Devices {
-	return d.rm.Devices()
-}
-
-type envvars map[string]string
-
-func (e envvars) toSlice() []string {
-	var envs []string
-	for k, v := range e {
-		envs = append(envs, k+"="+v)
-	}
-	return envs
-}
-
-// EnvVars returns the environment variables required for the daemon.
-// These should be passed to clients consuming the device shared using MPS.
-// TODO: Set CUDA_VISIBLE_DEVICES to include only the devices for this resource type.
-func (d *Daemon) EnvVars() envvars {
-	return map[string]string{
-		"CUDA_MPS_PIPE_DIRECTORY": d.PipeDir(),
-		"CUDA_MPS_LOG_DIRECTORY":  d.LogDir(),
-	}
-}
-
-// Start starts the MPS deamon as a background process.
-func (d *Daemon) Start() error {
-	if err := d.setComputeMode(computeModeExclusiveProcess); err != nil {
-		return fmt.Errorf("error setting compute mode %v: %w", computeModeExclusiveProcess, err)
-	}
-
-	klog.InfoS("Staring MPS daemon", "resource", d.rm.Resource())
-
-	pipeDir := d.PipeDir()
-	if err := os.MkdirAll(pipeDir, 0755); err != nil {
-		return fmt.Errorf("error creating directory %v: %w", pipeDir, err)
-	}
-
-	if err := setSELinuxContext(pipeDir, unprivilegedContainerSELinuxLabel); err != nil {
-		return fmt.Errorf("error setting SELinux context: %w", err)
-	}
-
-	logDir := d.LogDir()
-	if err := os.MkdirAll(logDir, 0755); err != nil {
-		return fmt.Errorf("error creating directory %v: %w", logDir, err)
-	}
-
-	mpsDaemon := exec.Command(mpsControlBin, "-d")
-	mpsDaemon.Env = append(mpsDaemon.Env, d.EnvVars().toSlice()...)
-	if err := mpsDaemon.Run(); err != nil {
-		return err
-	}
-
-	for index, limit := range d.perDevicePinnedDeviceMemoryLimits() {
-		_, err := d.EchoPipeToControl(fmt.Sprintf("set_default_device_pinned_mem_limit %s %s", index, limit))
-		if err != nil {
-			return fmt.Errorf("error setting pinned memory limit for device %v: %w", index, err)
-		}
-	}
-	if threadPercentage := d.activeThreadPercentage(); threadPercentage != "" {
-		_, err := d.EchoPipeToControl(fmt.Sprintf("set_default_active_thread_percentage %s", threadPercentage))
-		if err != nil {
-			return fmt.Errorf("error setting active thread percentage: %w", err)
-		}
-	}
-
-	statusFile, err := os.Create(d.startedFile())
-	if err != nil {
-		return err
-	}
-	defer statusFile.Close()
-
-	d.logTailer = newTailer(filepath.Join(logDir, "control.log"))
-	klog.InfoS("Starting log tailer", "resource", d.rm.Resource())
-	if err := d.logTailer.Start(); err != nil {
-		klog.ErrorS(err, "Could not start tail command on control.log; ignoring logs")
-	}
-
-	return nil
-}
-
-func setSELinuxContext(path string, context string) error {
-	_, err := os.Stat("/sys/fs/selinux")
-	if err != nil && errors.Is(err, os.ErrNotExist) {
-		klog.InfoS("SELinux disabled, not updating context", "path", path)
-		return nil
-	} else if err != nil {
-		return fmt.Errorf("error checking if SELinux is enabled: %w", err)
-	}
-
-	klog.InfoS("SELinux enabled, setting context", "path", path, "context", context)
-	return selinux.Chcon(path, context, true)
-}
-
-// Stop ensures that the MPS daemon is quit.
-func (d *Daemon) Stop() error {
-	_, err := d.EchoPipeToControl("quit")
-	if err != nil {
-		return fmt.Errorf("error sending quit message: %w", err)
-	}
-	klog.InfoS("Stopped MPS control daemon", "resource", d.rm.Resource())
-
-	err = d.logTailer.Stop()
-	klog.InfoS("Stopped log tailer", "resource", d.rm.Resource(), "error", err)
-
-	if err := d.setComputeMode(computeModeDefault); err != nil {
-		return fmt.Errorf("error setting compute mode %v: %w", computeModeDefault, err)
-	}
-
-	if err := os.Remove(d.startedFile()); err != nil && err != os.ErrNotExist {
-		return fmt.Errorf("failed to remove started file: %w", err)
-	}
-
-	logDir := d.LogDir()
-	if err := os.RemoveAll(logDir); err != nil {
-		klog.ErrorS(err, "Failed to remove pipe directory", "path", logDir)
-	}
-
-	return nil
-}
-
-func (d *Daemon) LogDir() string {
-	return d.root.LogDir(d.rm.Resource())
-}
-
-func (d *Daemon) PipeDir() string {
-	return d.root.PipeDir(d.rm.Resource())
-}
-
-func (d *Daemon) ShmDir() string {
-	return "/dev/shm"
-}
-
-func (d *Daemon) startedFile() string {
-	return d.root.startedFile(d.rm.Resource())
-}
-
-// AssertHealthy checks that the MPS control daemon is healthy.
-func (d *Daemon) AssertHealthy() error {
-	_, err := d.EchoPipeToControl("get_default_active_thread_percentage")
-	return err
-}
-
-// EchoPipeToControl sends the specified command to the MPS control daemon.
-func (d *Daemon) EchoPipeToControl(command string) (string, error) {
-	var out bytes.Buffer
-	reader, writer := io.Pipe()
-	defer writer.Close()
-	defer reader.Close()
-
-	mpsDaemon := exec.Command(mpsControlBin)
-	mpsDaemon.Env = append(mpsDaemon.Env, d.EnvVars().toSlice()...)
-
-	mpsDaemon.Stdin = reader
-	mpsDaemon.Stdout = &out
-
-	if err := mpsDaemon.Start(); err != nil {
-		return "", fmt.Errorf("failed to start NVIDIA MPS command: %w", err)
-	}
-
-	if _, err := writer.Write([]byte(command)); err != nil {
-		return "", fmt.Errorf("failed to write message to pipe: %w", err)
-	}
-	_ = writer.Close()
-
-	if err := mpsDaemon.Wait(); err != nil {
-		return "", fmt.Errorf("failed to send command to MPS daemon: %w", err)
-	}
-	return out.String(), nil
-}
-
-func (d *Daemon) setComputeMode(mode computeMode) error {
-	for _, uuid := range d.Devices().GetUUIDs() {
-		cmd := exec.Command(
-			"nvidia-smi",
-			"-i", uuid,
-			"-c", string(mode))
-		output, err := cmd.CombinedOutput()
-		if err != nil {
-			klog.Errorf("\n%v", string(output))
-			return fmt.Errorf("error running nvidia-smi: %w", err)
-		}
-	}
-	return nil
-}
-
-// perDevicePinnedMemoryLimits returns the pinned memory limits for each device.
-func (m *Daemon) perDevicePinnedDeviceMemoryLimits() map[string]string {
-	totalMemoryInBytesPerDevice := make(map[string]uint64)
-	replicasPerDevice := make(map[string]uint64)
-	for _, device := range m.Devices() {
-		index := device.Index
-		totalMemoryInBytesPerDevice[index] = device.TotalMemory
-		replicasPerDevice[index] += 1
-	}
-
-	limits := make(map[string]string)
-	for index, totalMemory := range totalMemoryInBytesPerDevice {
-		if totalMemory == 0 {
-			continue
-		}
-		replicas := replicasPerDevice[index]
-		limits[index] = fmt.Sprintf("%vM", totalMemory/replicas/1024/1024)
-	}
-	return limits
-}
-
-func (m *Daemon) activeThreadPercentage() string {
-	if len(m.Devices()) == 0 {
-		return ""
-	}
-	replicasPerDevice := len(m.Devices()) / len(m.Devices().GetUUIDs())
-
-	return fmt.Sprintf("%d", 100/replicasPerDevice)
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/device.go b/pkg/nvidia-plugin/mps-control-daemon/mps/device.go
deleted file mode 100644
index bd8b1bf3c..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/device.go
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"errors"
-	"fmt"
-	"strings"
-
-	"golang.org/x/mod/semver"
-
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
-)
-
-var errInvalidDevice = errors.New("invalid device")
-
-// mpsDevice represents an MPS-specific alias for an rm.Device.
-type mpsDevice rm.Device
-
-// assertReplicas checks whether the number of replicas specified is valid.
-func (d *mpsDevice) assertReplicas() error {
-	maxClients := d.maxClients()
-	if d.Replicas > maxClients {
-		return fmt.Errorf("%w maximum allowed replicas exceeded: %d > %d", errInvalidDevice, d.Replicas, maxClients)
-	}
-	return nil
-}
-
-// maxClients returns the maximum number of clients supported by an MPS server.
-func (d *mpsDevice) maxClients() int {
-	if d.isAtLeastVolta() {
-		return 48
-	}
-	return 16
-}
-
-// isAtLeastVolta checks whether the specified device is a volta device or newer.
-func (d *mpsDevice) isAtLeastVolta() bool {
-	vCc := "v" + strings.TrimPrefix(d.ComputeCapability, "v")
-	return semver.Compare(semver.Canonical(vCc), semver.Canonical("v7.5")) >= 0
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go b/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
deleted file mode 100644
index 17cef28ea..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/device_test.go
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-)
-
-func TestDevice(t *testing.T) {
-	testCases := []struct {
-		description            string
-		input                  mpsDevice
-		expectedIsAtLeastVolta bool
-		expectedMaxClients     int
-		expectedAssertReplicas error
-	}{
-		{
-			description: "leading v ignored",
-			input: mpsDevice{
-				ComputeCapability: "v7.5",
-			},
-			expectedIsAtLeastVolta: true,
-			expectedMaxClients:     48,
-		},
-		{
-			description: "no-leading v supported",
-			input: mpsDevice{
-				ComputeCapability: "7.5",
-			},
-			expectedIsAtLeastVolta: true,
-			expectedMaxClients:     48,
-		},
-		{
-			description: "pre-volta clients",
-			input: mpsDevice{
-				ComputeCapability: "7.0",
-			},
-			expectedIsAtLeastVolta: false,
-			expectedMaxClients:     16,
-		},
-		{
-			description: "post-volta clients",
-			input: mpsDevice{
-				ComputeCapability: "9.0",
-			},
-			expectedIsAtLeastVolta: true,
-			expectedMaxClients:     48,
-		},
-		{
-			description: "pre-volta clients exceeded",
-			input: mpsDevice{
-				ComputeCapability: "7.0",
-				Replicas:          29,
-			},
-			expectedIsAtLeastVolta: false,
-			expectedMaxClients:     16,
-			expectedAssertReplicas: errInvalidDevice,
-		},
-		{
-			description: "post-volta clients exceeded",
-			input: mpsDevice{
-				ComputeCapability: "9.0",
-				Replicas:          49,
-			},
-			expectedIsAtLeastVolta: true,
-			expectedMaxClients:     48,
-			expectedAssertReplicas: errInvalidDevice,
-		},
-		{
-			description: "pre-volta clients max",
-			input: mpsDevice{
-				ComputeCapability: "7.0",
-				Replicas:          16,
-			},
-			expectedIsAtLeastVolta: false,
-			expectedMaxClients:     16,
-		},
-		{
-			description: "post-volta clients max",
-			input: mpsDevice{
-				ComputeCapability: "9.0",
-				Replicas:          48,
-			},
-			expectedIsAtLeastVolta: true,
-			expectedMaxClients:     48,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.description, func(t *testing.T) {
-			require.Equal(t, tc.expectedIsAtLeastVolta, tc.input.isAtLeastVolta())
-			require.Equal(t, tc.expectedMaxClients, tc.input.maxClients())
-			require.ErrorIs(t, tc.input.assertReplicas(), tc.expectedAssertReplicas)
-		})
-	}
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go b/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
deleted file mode 100644
index d9fb87b84..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/log-tailer.go
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"context"
-	"os"
-	"os/exec"
-)
-
-// tailer tails the contents of a file.
-type tailer struct {
-	filename string
-	cmd      *exec.Cmd
-	cancel   context.CancelFunc
-}
-
-// newTailer creates a tailer.
-func newTailer(filename string) *tailer {
-	return &tailer{
-		filename: filename,
-	}
-}
-
-// Start starts tailing the specified filename.
-func (t *tailer) Start() error {
-	ctx, cancel := context.WithCancel(context.Background())
-	t.cancel = cancel
-
-	//nolint:gosec // G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
-	cmd := exec.CommandContext(ctx, "tail", "-n", "+1", "-f", t.filename)
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-
-	if err := cmd.Start(); err != nil {
-		return err
-	}
-	t.cmd = cmd
-	return nil
-}
-
-// Stop stops the tailer.
-// The associated cancel function is called after which the command wait is
-// called -- if applicable.
-func (t *tailer) Stop() error {
-	if t.cancel != nil {
-		t.cancel()
-	}
-
-	if t.cmd == nil {
-		return nil
-	}
-
-	return t.cmd.Wait()
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go b/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
deleted file mode 100644
index 719a358e6..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/manager.go
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"fmt"
-
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
-	"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
-	"github.com/NVIDIA/go-nvml/pkg/nvml"
-	"k8s.io/klog/v2"
-
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
-)
-
-type Manager interface {
-	Daemons() ([]*Daemon, error)
-}
-
-type manager struct {
-	infolib   info.Interface
-	nvmllib   nvml.Interface
-	devicelib device.Interface
-	config    *nvidia.DeviceConfig
-}
-
-type nullManager struct{}
-
-// Daemons creates the required set of MPS daemons for the specified options.
-func NewDaemons(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) ([]*Daemon, error) {
-	manager, err := New(infolib, nvmllib, devicelib, opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create MPS manager: %w", err)
-	}
-	return manager.Daemons()
-}
-
-// New creates a manager for MPS daemons.
-// If MPS is not configured, a manager is returned that manages no daemons.
-func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, opts ...Option) (Manager, error) {
-	m := &manager{
-		infolib:   infolib,
-		nvmllib:   nvmllib,
-		devicelib: devicelib,
-	}
-	for _, opt := range opts {
-		opt(m)
-	}
-
-	if strategy := m.config.Sharing.SharingStrategy(); strategy != spec.SharingStrategyMPS {
-		klog.InfoS("Sharing strategy is not MPS; skipping MPS manager creation", "strategy", strategy)
-		return &nullManager{}, nil
-	}
-
-	return m, nil
-}
-
-func (m *manager) Daemons() ([]*Daemon, error) {
-	resourceManagers, err := rm.NewNVMLResourceManagers(m.infolib, m.nvmllib, m.devicelib, m.config)
-	if err != nil {
-		return nil, err
-	}
-	var daemons []*Daemon
-	for _, resourceManager := range resourceManagers {
-		// We don't create daemons if there are no devices associated with the resource manager.
-		if len(resourceManager.Devices()) == 0 {
-			klog.InfoS("No devices associated with resource", "resource", resourceManager.Resource())
-			continue
-		}
-		// Check if the resources are shared.
-		// TODO: We should add a more explicit check for MPS specifically
-		if !rm.AnnotatedIDs(resourceManager.Devices().GetIDs()).AnyHasAnnotations() {
-			klog.InfoS("Resource is not shared", "resource", "resource", resourceManager.Resource())
-			continue
-		}
-		// Check if MIG devices are included.
-		for _, rmDevice := range resourceManager.Devices() {
-			if rmDevice.IsMigDevice() {
-				klog.Warning("MPS sharing is not supported for MIG devices; skipping daemon creation")
-				continue
-			}
-			if err := (*mpsDevice)(rmDevice).assertReplicas(); err != nil {
-				return nil, fmt.Errorf("invalid MPS configuration: %w", err)
-			}
-		}
-		daemon := NewDaemon(resourceManager, ContainerRoot)
-		daemons = append(daemons, daemon)
-	}
-
-	return daemons, nil
-}
-
-// Daemons always returns an empty slice for a nullManager.
-func (m *nullManager) Daemons() ([]*Daemon, error) {
-	return nil, nil
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/options.go b/pkg/nvidia-plugin/mps-control-daemon/mps/options.go
deleted file mode 100644
index ca97d122f..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/options.go
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import "github.com/Project-HAMi/HAMi/pkg/device/nvidia"
-
-// Option defines a functional option for configuring an MPS manager.
-type Option func(*manager)
-
-// WithConfig sets the config associated with the MPS manager.
-func WithConfig(config *nvidia.DeviceConfig) Option {
-	return func(m *manager) {
-		m.config = config
-	}
-}
diff --git a/pkg/nvidia-plugin/mps-control-daemon/mps/root.go b/pkg/nvidia-plugin/mps-control-daemon/mps/root.go
deleted file mode 100644
index 9c2e105f8..000000000
--- a/pkg/nvidia-plugin/mps-control-daemon/mps/root.go
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package mps
-
-import (
-	"path/filepath"
-
-	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
-)
-
-const (
-	ContainerRoot = Root("/mps")
-)
-
-// Root represents an MPS root.
-// This is where per-resource pipe and log directories are created.
-// For containerised applications the host root is typically mounted to /mps in the container.
-type Root string
-
-// LogDir returns the per-resource pipe dir for the specified root.
-func (r Root) LogDir(resourceName spec.ResourceName) string {
-	return r.Path(string(resourceName), "log")
-}
-
-// PipeDir returns the per-resource pipe dir for the specified root.
-func (r Root) PipeDir(resourceName spec.ResourceName) string {
-	return r.Path(string(resourceName), "pipe")
-}
-
-// ShmDir returns the shm dir associated with the root.
-// Note that the shm dir is the same for all resources.
-func (r Root) ShmDir(resourceName spec.ResourceName) string {
-	return r.Path("shm")
-}
-
-// startedFile returns the per-resource .started file name for the specified root.
-func (r Root) startedFile(resourceName spec.ResourceName) string {
-	return r.Path(string(resourceName), ".started")
-}
-
-// Path returns a path relative to the MPS root.
-func (r Root) Path(parts ...string) string {
-	pathparts := append([]string{string(r)}, parts...)
-	return filepath.Join(pathparts...)
-}
diff --git a/pkg/nvidia-plugin/pkg/plugin/mps.go b/pkg/nvidia-plugin/pkg/plugin/mps.go
deleted file mode 100644
index c4b304f07..000000000
--- a/pkg/nvidia-plugin/pkg/plugin/mps.go
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
-# Copyright 2024 NVIDIA CORPORATION
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-package plugin
-
-import (
-	"errors"
-	"fmt"
-
-	"k8s.io/klog/v2"
-	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
-
-	spec "github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/api/config/v1"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/mps-control-daemon/mps"
-	"github.com/Project-HAMi/HAMi/pkg/nvidia-plugin/pkg/rm"
-)
-
-type mpsOptions struct {
-	enabled      bool
-	resourceName spec.ResourceName
-	daemon       *mps.Daemon
-	hostRoot     mps.Root
-}
-
-// getMPSOptions returns the MPS options specified for the resource manager.
-// If MPS is not configured and empty set of options is returned.
-func (o *options) getMPSOptions(resourceManager rm.ResourceManager) (mpsOptions, error) {
-	if o.config.Sharing.SharingStrategy() != spec.SharingStrategyMPS {
-		return mpsOptions{}, nil
-	}
-
-	// TODO: It might make sense to pull this logic into a resource manager.
-	for _, device := range resourceManager.Devices() {
-		if device.IsMigDevice() {
-			return mpsOptions{}, errors.New("sharing using MPS is not supported for MIG devices")
-		}
-	}
-
-	m := mpsOptions{
-		enabled:      true,
-		resourceName: resourceManager.Resource(),
-		daemon:       mps.NewDaemon(resourceManager, mps.ContainerRoot),
-		hostRoot:     mps.Root(*o.config.Flags.CommandLineFlags.MpsRoot),
-	}
-	return m, nil
-}
-
-func (m *mpsOptions) waitForDaemon() error {
-	if m == nil || !m.enabled {
-		return nil
-	}
-	// TODO: Check the .ready file here.
-	// TODO: Have some retry strategy here.
-	if err := m.daemon.AssertHealthy(); err != nil {
-		return fmt.Errorf("error checking MPS daemon health: %w", err)
-	}
-	klog.InfoS("MPS daemon is healthy", "resource", m.resourceName)
-	return nil
-}
-
-func (m *mpsOptions) updateReponse(response *pluginapi.ContainerAllocateResponse) {
-	if m == nil || !m.enabled {
-		return
-	}
-	// TODO: We should check that the deviceIDs are shared using MPS.
-	response.Envs["CUDA_MPS_PIPE_DIRECTORY"] = m.daemon.PipeDir()
-
-	response.Mounts = append(response.Mounts,
-		&pluginapi.Mount{
-			ContainerPath: m.daemon.PipeDir(),
-			HostPath:      m.hostRoot.PipeDir(m.resourceName),
-		},
-		&pluginapi.Mount{
-			ContainerPath: m.daemon.ShmDir(),
-			HostPath:      m.hostRoot.ShmDir(m.resourceName),
-		},
-	)
-}
diff --git a/pkg/nvidia-plugin/pkg/plugin/server.go b/pkg/nvidia-plugin/pkg/plugin/server.go
index af8c3843f..8f562875d 100644
--- a/pkg/nvidia-plugin/pkg/plugin/server.go
+++ b/pkg/nvidia-plugin/pkg/plugin/server.go
@@ -80,8 +80,6 @@ type NvidiaDevicePlugin struct {
 
 	imexChannels imex.Channels
 
-	mps mpsOptions
-
 	operatingMode   string
 	migCurrent      nvidia.MigPartedSpec
 	schedulerConfig nvidia.NvidiaConfig
@@ -89,10 +87,6 @@ type NvidiaDevicePlugin struct {
 
 // devicePluginForResource creates a device plugin for the specified resource.
 func (o *options) devicePluginForResource(resourceManager rm.ResourceManager) (Interface, error) {
-	mpsOptions, err := o.getMPSOptions(resourceManager)
-	if err != nil {
-		return nil, err
-	}
 	sConfig, mode, err := LoadNvidiaDevicePluginConfig()
 	if err != nil {
 		return nil, fmt.Errorf("failed to load nvidia plugin config: %v", err)
@@ -113,8 +107,6 @@ func (o *options) devicePluginForResource(resourceManager rm.ResourceManager) (I
 
 		imexChannels: o.imexChannels,
 
-		mps: mpsOptions,
-
 		socket: getPluginSocketPath(resourceManager.Resource()),
 		// These will be reinitialized every
 		// time the plugin server is restarted.
@@ -208,10 +200,6 @@ func (plugin *NvidiaDevicePlugin) Devices() rm.Devices {
 func (plugin *NvidiaDevicePlugin) Start(kubeletSocket string) error {
 	plugin.initialize()
 
-	if err := plugin.mps.waitForDaemon(); err != nil {
-		return fmt.Errorf("error waiting for MPS daemon: %w", err)
-	}
-
 	err := plugin.Serve()
 	if err != nil {
 		klog.Errorf("Could not start device plugin for '%s': %s", plugin.rm.Resource(), err)
@@ -251,7 +239,6 @@ func (plugin *NvidiaDevicePlugin) Start(kubeletSocket string) error {
 	}
 
 	go func() {
-		// TODO: add MPS health check
 		err := plugin.rm.CheckHealth(plugin.stop, plugin.health)
 		if err != nil {
 			klog.Errorf("Failed to start health check: %v; continuing with health checks disabled", err)
@@ -410,14 +397,16 @@ func (plugin *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r
 
 // Allocate which return list of devices.
 func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
-	responses := pluginapi.AllocateResponse{}
 
+	klog.InfoS("Allocate", "request", reqs)
+	responses := pluginapi.AllocateResponse{}
 	nodeName := os.Getenv(util.NodeNameEnvName)
+	klog.Infof("Allocate request on node %s", nodeName)
 	current, err := util.GetPendingPod(ctx, nodeName)
 	if err != nil {
 		return &responses, err
 	}
-
+	klog.Infof("Allocate pod name is %s/%s, annotation is %+v", current.Namespace, current.Name, current.Annotations)
 	for idx, req := range reqs.ContainerRequests {
 		if err := plugin.rm.ValidateRequest(req.DevicesIDs); err != nil {
 			return nil, fmt.Errorf("invalid allocation request for %q: %w", plugin.rm.Resource(), err)
@@ -509,7 +498,8 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 		}
 		responses.ContainerResponses = append(responses.ContainerResponses, response)
 	}
-
+	klog.Infof("Final allocate response: %v", responses)
+	device.PodAllocationTrySuccess(nodeName, nvidia.NvidiaGPUDevice, NodeLockNvidia, current)
 	return &responses, nil
 }
 
@@ -526,23 +516,19 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu
 			return nil, fmt.Errorf("failed to get allocate response for CDI: %v", err)
 		}
 	}
-	if plugin.mps.enabled {
-		plugin.updateResponseForMPS(response)
-	}
-
 	// The following modifications are only made if at least one non-CDI device
 	// list strategy is selected.
 	if plugin.deviceListStrategies.AllCDIEnabled() {
 		return response, nil
 	}
 
-	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyEnvVar) {
-		plugin.updateResponseForDeviceListEnvVar(response, deviceIDs...)
-		plugin.updateResponseForImexChannelsEnvVar(response)
-	}
-	if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyVolumeMounts) {
-		plugin.updateResponseForDeviceMounts(response, deviceIDs...)
-	}
+	// if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyEnvVar) {
+	// 	plugin.updateResponseForDeviceListEnvVar(response, deviceIDs...)
+	// 	plugin.updateResponseForImexChannelsEnvVar(response)
+	// }
+	// if plugin.deviceListStrategies.Includes(spec.DeviceListStrategyVolumeMounts) {
+	// 	plugin.updateResponseForDeviceMounts(response, deviceIDs...)
+	// }
 	if *plugin.config.Flags.Plugin.PassDeviceSpecs {
 		response.Devices = append(response.Devices, plugin.apiDeviceSpecs(*plugin.config.Flags.NvidiaDevRoot, requestIds)...)
 	}
@@ -555,13 +541,6 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu
 	return response, nil
 }
 
-// updateResponseForMPS ensures that the ContainerAllocate response contains the information required to use MPS.
-// This includes per-resource pipe and log directories as well as a global daemon-specific shm
-// and assumes that an MPS control daemon has already been started.
-func (plugin NvidiaDevicePlugin) updateResponseForMPS(response *pluginapi.ContainerAllocateResponse) {
-	plugin.mps.updateReponse(response)
-}
-
 // updateResponseForCDI updates the specified response for the given device IDs.
 // This response contains the annotations required to trigger CDI injection in the container engine or nvidia-container-runtime.
 func (plugin *NvidiaDevicePlugin) updateResponseForCDI(response *pluginapi.ContainerAllocateResponse, responseID string, deviceIDs ...string) error {
diff --git a/pkg/nvidia-plugin/pkg/rm/rm.go b/pkg/nvidia-plugin/pkg/rm/rm.go
index 5267b60b1..4747a546d 100644
--- a/pkg/nvidia-plugin/pkg/rm/rm.go
+++ b/pkg/nvidia-plugin/pkg/rm/rm.go
@@ -95,11 +95,15 @@ func (r *resourceManager) ValidateRequest(ids AnnotatedIDs) error {
 }
 
 // AddDefaultResourcesToConfig adds default resource matching rules to config.Resources
-func AddDefaultResourcesToConfig(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) error {
-	_ = config.Resources.AddGPUResource("*", "gpu")
-	if config.Flags.MigStrategy == nil {
-		return nil
-	}
+func AddDefaultResourcesToConfig(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *nvidia.DeviceConfig) error {
+	// _ = config.Resources.AddGPUResource("*", "gpu")
+	// if config.Flags.MigStrategy == nil {
+	// 	return nil
+	// }
+	config.Resources.GPUs = append(config.Resources.GPUs, spec.Resource{
+		Pattern: "*",
+		Name:    spec.ResourceName(*config.ResourceName),
+	})
 	switch *config.Flags.MigStrategy {
 	case spec.MigStrategySingle:
 		return config.Resources.AddMIGResource("*", "gpu")

From 0f895a109a28b8495c081b3ebf388a0b86032c25 Mon Sep 17 00:00:00 2001
From: haitwang-cloud <haitao_wht@outlook.com>
Date: Wed, 2 Apr 2025 18:02:07 +0800
Subject: [PATCH 4/5] refactor: remove unnecessary logging statements and
 improve device processing logs

---
 pkg/device/devices.go                    |  23 ++++-
 pkg/nvidia-plugin/pkg/plugin/register.go |   7 +-
 pkg/nvidia-plugin/pkg/plugin/server.go   | 105 +++++++++++++++--------
 pkg/nvidia-plugin/pkg/plugin/util.go     |   1 -
 pkg/nvidia-plugin/pkg/rm/devices.go      |  18 +++-
 pkg/util/util.go                         |   2 -
 6 files changed, 108 insertions(+), 48 deletions(-)

diff --git a/pkg/device/devices.go b/pkg/device/devices.go
index 019bd5d63..a40cb72b3 100644
--- a/pkg/device/devices.go
+++ b/pkg/device/devices.go
@@ -319,19 +319,34 @@ vnpus:
 }
 
 func PodAllocationTrySuccess(nodeName string, devName string, lockName string, pod *corev1.Pod) {
+	// 日志：开始尝试分配成功逻辑
+	klog.InfoS("Starting PodAllocationTrySuccess", "nodeName", nodeName, "deviceName", devName, "lockName", lockName, "namespace", pod.Namespace, "podName", pod.Name)
+
+	// 获取最新的 Pod 信息
 	refreshed, err := client.GetClient().CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{})
 	if err != nil {
-		klog.Errorf("Error getting pod %s/%s: %v", pod.Namespace, pod.Name, err)
+		klog.ErrorS(err, "Failed to get refreshed pod", "nodeName", nodeName, "namespace", pod.Namespace, "podName", pod.Name)
+		return
+	}
+
+	// 获取设备相关的注释信息
+	annos, exists := refreshed.Annotations[util.InRequestDevices[devName]]
+	if !exists {
+		klog.Warningf("Annotation for device %s not found in pod %s/%s", devName, pod.Namespace, pod.Name)
 		return
 	}
-	annos := refreshed.Annotations[util.InRequestDevices[devName]]
-	klog.Infof("Trying allocation success: %s", annos)
+	klog.InfoS("Processing allocation success", "annotations", annos, "namespace", pod.Namespace, "podName", pod.Name)
+
+	// 检查是否还有未处理的设备
 	for _, val := range DevicesToHandle {
 		if strings.Contains(annos, val) {
+			klog.Infof("Device %s still pending allocation for pod %s/%s", val, pod.Namespace, pod.Name)
 			return
 		}
 	}
-	klog.Infof("All devices allocate success, releasing lock")
+
+	// 如果所有设备都已成功分配，释放锁
+	klog.InfoS("All devices allocated successfully, releasing lock", "namespace", pod.Namespace, "podName", pod.Name)
 	PodAllocationSuccess(nodeName, pod, lockName)
 }
 
diff --git a/pkg/nvidia-plugin/pkg/plugin/register.go b/pkg/nvidia-plugin/pkg/plugin/register.go
index c78141ea9..10ce458f0 100644
--- a/pkg/nvidia-plugin/pkg/plugin/register.go
+++ b/pkg/nvidia-plugin/pkg/plugin/register.go
@@ -141,7 +141,6 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 		if plugin.schedulerConfig.DeviceMemoryScaling != 1 {
 			registeredmem = int32(float64(registeredmem) * plugin.schedulerConfig.DeviceMemoryScaling)
 		}
-		klog.Infoln("MemoryScaling=", plugin.schedulerConfig.DeviceMemoryScaling, "registeredmem=", registeredmem)
 		health := true
 		for _, val := range devs {
 			if strings.Compare(val.ID, UUID) == 0 {
@@ -170,7 +169,6 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			Mode:    plugin.operatingMode,
 			Health:  health,
 		})
-		klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredmem, Model, numa)
 	}
 	return &res
 }
@@ -187,7 +185,6 @@ func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error {
 	encodeddevices := util.EncodeNodeDevices(*devices)
 	annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String()
 	annos[nvidia.RegisterAnnos] = encodeddevices
-	klog.Infof("patch node with the following annos %v", fmt.Sprintf("%v", annos))
 	err = util.PatchNodeAnnotations(node, annos)
 
 	if err != nil {
@@ -203,11 +200,9 @@ func (plugin *NvidiaDevicePlugin) WatchAndRegister() {
 	for {
 		err := plugin.RegistrInAnnotation()
 		if err != nil {
-			klog.Errorf("Failed to register annotation: %v", err)
-			klog.Infof("Retrying in %v seconds...", errorSleepInterval)
+			klog.Errorf("Failed to register annotation: %v. Retrying in %v...", err, errorSleepInterval)
 			time.Sleep(errorSleepInterval)
 		} else {
-			klog.Infof("Successfully registered annotation. Next check in %v seconds...", successSleepInterval)
 			time.Sleep(successSleepInterval)
 		}
 	}
diff --git a/pkg/nvidia-plugin/pkg/plugin/server.go b/pkg/nvidia-plugin/pkg/plugin/server.go
index 8f562875d..94b3cfd14 100644
--- a/pkg/nvidia-plugin/pkg/plugin/server.go
+++ b/pkg/nvidia-plugin/pkg/plugin/server.go
@@ -337,7 +337,7 @@ func (plugin *NvidiaDevicePlugin) Register(kubeletSocket string) error {
 		Endpoint:     path.Base(plugin.socket),
 		ResourceName: string(plugin.rm.Resource()),
 		Options: &pluginapi.DevicePluginOptions{
-			GetPreferredAllocationAvailable: true,
+			GetPreferredAllocationAvailable: false,
 		},
 	}
 
@@ -351,7 +351,7 @@ func (plugin *NvidiaDevicePlugin) Register(kubeletSocket string) error {
 // GetDevicePluginOptions returns the values of the optional settings for this plugin
 func (plugin *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
 	options := &pluginapi.DevicePluginOptions{
-		GetPreferredAllocationAvailable: true,
+		GetPreferredAllocationAvailable: false,
 	}
 	return options, nil
 }
@@ -397,42 +397,60 @@ func (plugin *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r
 
 // Allocate which return list of devices.
 func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
+	// 日志：函数开始
+	klog.InfoS("Allocate function started", "request", reqs)
 
-	klog.InfoS("Allocate", "request", reqs)
 	responses := pluginapi.AllocateResponse{}
 	nodeName := os.Getenv(util.NodeNameEnvName)
-	klog.Infof("Allocate request on node %s", nodeName)
+	klog.InfoS("Processing allocate request on node", "nodeName", nodeName)
+
+	// 获取当前待处理的 Pod 信息
 	current, err := util.GetPendingPod(ctx, nodeName)
 	if err != nil {
+		klog.ErrorS(err, "Failed to get pending pod", "nodeName", nodeName)
 		return &responses, err
 	}
-	klog.Infof("Allocate pod name is %s/%s, annotation is %+v", current.Namespace, current.Name, current.Annotations)
-	for idx, req := range reqs.ContainerRequests {
-		if err := plugin.rm.ValidateRequest(req.DevicesIDs); err != nil {
-			return nil, fmt.Errorf("invalid allocation request for %q: %w", plugin.rm.Resource(), err)
-		}
+	klog.InfoS("Processing allocate request for pod", "namespace", current.Namespace, "name", current.Name, "annotations", current.Annotations)
+
+	for idx, _ := range reqs.ContainerRequests {
+		containerIndex := idx + 1
+		klog.InfoS("Processing container request", "containerIndex", containerIndex, "totalContainers", len(reqs.ContainerRequests), "namespace", current.Namespace, "podName", current.Name)
+
+		//if err := plugin.rm.ValidateRequest(req.DevicesIDs); err != nil {
+		//	klog.ErrorS(err, "Invalid allocation request", "resource", plugin.rm.Resource(), "devicesIDs", req.DevicesIDs, "namespace", current.Namespace, "podName", current.Name)
+		//	return nil, fmt.Errorf("invalid allocation request for %q: %w", plugin.rm.Resource(), err)
+		//}
+
 		currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current)
-		klog.Infoln("deviceAllocateFromAnnotation=", devreq)
 		if err != nil {
+			klog.ErrorS(err, "Failed to get next device request", "nodeName", nodeName, "namespace", current.Namespace, "podName", current.Name)
 			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
 			return &responses, err
 		}
+
 		if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) {
+			err := errors.New("device number not matched")
+			klog.ErrorS(err, "Device number mismatch", "expected", len(reqs.ContainerRequests[idx].DevicesIDs), "got", len(devreq), "namespace", current.Namespace, "podName", current.Name)
 			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
-			return &responses, errors.New("device number not matched")
+			return &responses, err
 		}
+
 		response, err := plugin.getAllocateResponse(plugin.GetContainerDeviceStrArray(devreq))
 		if err != nil {
+			klog.ErrorS(err, "Failed to get allocate response", "namespace", current.Namespace, "podName", current.Name)
 			return nil, fmt.Errorf("failed to get allocate response: %v", err)
 		}
 
-		err = EraseNextDeviceTypeFromAnnotation(nvidia.NvidiaGPUDevice, *current)
-		if err != nil {
+		if err := EraseNextDeviceTypeFromAnnotation(nvidia.NvidiaGPUDevice, *current); err != nil {
+			klog.ErrorS(err, "Failed to erase next device type from annotation", "namespace", current.Namespace, "podName", current.Name)
 			device.PodAllocationFailed(nodeName, current, NodeLockNvidia)
 			return &responses, err
 		}
 
 		if plugin.operatingMode != "mig" {
+			klog.InfoS("Starting to allocate devices for pod", "namespace", current.Namespace, "podName", current.Name)
+
+			response.Envs["NVIDIA_VISIBLE_DEVICES"] = strings.Join(reqs.ContainerRequests[idx].DevicesIDs, ",")
 			for i, dev := range devreq {
 				limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
 				response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem)
@@ -445,43 +463,53 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 			if plugin.schedulerConfig.DisableCoreLimit {
 				response.Envs[util.CoreLimitSwitch] = "disable"
 			}
+
 			cacheFileHostDirectory := fmt.Sprintf("%s/vgpu/containers/%s_%s", hostHookPath, current.UID, currentCtr.Name)
 			os.RemoveAll(cacheFileHostDirectory)
-
+			klog.InfoS("Creating cache file host directory for pod", "namespace", current.Namespace, "podName", current.Name)
 			os.MkdirAll(cacheFileHostDirectory, 0777)
 			os.Chmod(cacheFileHostDirectory, 0777)
 			os.MkdirAll("/tmp/vgpulock", 0777)
 			os.Chmod("/tmp/vgpulock", 0777)
+
 			response.Mounts = append(response.Mounts,
-				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu/libvgpu.so", hostHookPath),
-					HostPath: GetLibPath(),
-					ReadOnly: true},
-				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu", hostHookPath),
-					HostPath: cacheFileHostDirectory,
-					ReadOnly: false},
-				&pluginapi.Mount{ContainerPath: "/tmp/vgpulock",
-					HostPath: "/tmp/vgpulock",
-					ReadOnly: false},
+				&pluginapi.Mount{
+					ContainerPath: fmt.Sprintf("%s/vgpu/libvgpu.so", hostHookPath),
+					HostPath:      GetLibPath(),
+					ReadOnly:      true,
+				},
+				&pluginapi.Mount{
+					ContainerPath: fmt.Sprintf("%s/vgpu", hostHookPath),
+					HostPath:      cacheFileHostDirectory,
+					ReadOnly:      false,
+				},
+				&pluginapi.Mount{
+					ContainerPath: "/tmp/vgpulock",
+					HostPath:      "/tmp/vgpulock",
+					ReadOnly:      false,
+				},
 			)
+
+			// 检查 CUDA_DISABLE_CONTROL 环境变量是否存在
 			found := false
 			for _, val := range currentCtr.Env {
 				if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
-					// if env existed but is set to false or can not be parsed, ignore
 					t, _ := strconv.ParseBool(val.Value)
-					if !t {
-						continue
+					if t {
+						found = true
+						break
 					}
-					// only env existed and set to true, we mark it "found"
-					found = true
-					break
 				}
 			}
 			if !found {
-				response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
-					HostPath: hostHookPath + "/vgpu/ld.so.preload",
-					ReadOnly: true},
-				)
+				response.Mounts = append(response.Mounts, &pluginapi.Mount{
+					ContainerPath: "/etc/ld.so.preload",
+					HostPath:      hostHookPath + "/vgpu/ld.so.preload",
+					ReadOnly:      true,
+				})
 			}
+
+			// 检查许可证文件是否存在
 			_, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath))
 			if err == nil {
 				response.Mounts = append(response.Mounts, &pluginapi.Mount{
@@ -496,10 +524,19 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 				})
 			}
 		}
+
+		// 将容器响应追加到最终响应中
+		klog.InfoS("Appending container response", "containerIndex", containerIndex, "totalContainers", len(reqs.ContainerRequests), "namespace", current.Namespace, "podName", current.Name)
 		responses.ContainerResponses = append(responses.ContainerResponses, response)
 	}
-	klog.Infof("Final allocate response: %v", responses)
+
+	// 日志：最终分配响应
+	klog.InfoS("Final allocate response generated", "response", responses)
+
+	// 标记 Pod 分配成功
 	device.PodAllocationTrySuccess(nodeName, nvidia.NvidiaGPUDevice, NodeLockNvidia, current)
+	klog.InfoS("Allocate function completed successfully", "response", responses)
+
 	return &responses, nil
 }
 
diff --git a/pkg/nvidia-plugin/pkg/plugin/util.go b/pkg/nvidia-plugin/pkg/plugin/util.go
index 2b16900b3..3e2e248a9 100644
--- a/pkg/nvidia-plugin/pkg/plugin/util.go
+++ b/pkg/nvidia-plugin/pkg/plugin/util.go
@@ -85,7 +85,6 @@ func EraseNextDeviceTypeFromAnnotation(dtype string, p corev1.Pod) error {
 			}
 		}
 	}
-	klog.Infoln("After erase res=", res)
 	newannos := make(map[string]string)
 	newannos[util.InRequestDevices[dtype]] = util.EncodePodSingleDevice(res)
 	return util.PatchPodAnnotations(&p, newannos)
diff --git a/pkg/nvidia-plugin/pkg/rm/devices.go b/pkg/nvidia-plugin/pkg/rm/devices.go
index 150c9a19b..ac1af5cb7 100644
--- a/pkg/nvidia-plugin/pkg/rm/devices.go
+++ b/pkg/nvidia-plugin/pkg/rm/devices.go
@@ -21,6 +21,7 @@ import (
 	"strconv"
 	"strings"
 
+	"k8s.io/klog/v2"
 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 )
 
@@ -177,10 +178,21 @@ func (ds Devices) GetUUIDs() []string {
 // GetPluginDevices returns the plugin Devices from all devices in the Devices
 func (ds Devices) GetPluginDevices(count uint) []*pluginapi.Device {
 	var res []*pluginapi.Device
-	if !strings.Contains(ds.GetIDs()[0], "MIG") {
+
+	// Log the IDs of all devices
+	ids := ds.GetIDs()
+	if len(ids) == 0 {
+		klog.Warning("No devices found in Devices map")
+		return res
+	}
+	klog.InfoS("Processing devices", "deviceIDs", ids, "count", count)
+
+	if !strings.Contains(ids[0], "MIG") {
+		klog.Info("Devices are not MIG-enabled, generating plugin devices with replicas")
 		for _, dev := range ds {
 			for i := uint(0); i < count; i++ {
 				id := fmt.Sprintf("%v-%v", dev.ID, i)
+				klog.InfoS("Adding device", "deviceID", id, "health", dev.Health)
 				res = append(res, &pluginapi.Device{
 					ID:       id,
 					Health:   dev.Health,
@@ -189,11 +201,15 @@ func (ds Devices) GetPluginDevices(count uint) []*pluginapi.Device {
 			}
 		}
 	} else {
+		klog.Info("Devices are MIG-enabled, adding directly")
 		for _, device := range ds {
 			d := device
+			klog.InfoS("Adding MIG device", "deviceID", d.ID, "health", d.Health)
 			res = append(res, &d.Device)
 		}
 	}
+
+	klog.InfoS("Finished processing devices", "totalDevices", len(res))
 	return res
 }
 
diff --git a/pkg/util/util.go b/pkg/util/util.go
index 4a4bb2dba..fbd3e69c6 100644
--- a/pkg/util/util.go
+++ b/pkg/util/util.go
@@ -231,7 +231,6 @@ func EncodeContainerDevices(cd ContainerDevices) string {
 	for _, val := range cd {
 		tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + OneContainerMultiDeviceSplitSymbol
 	}
-	klog.Infof("Encoded container Devices: %s", tmp)
 	return tmp
 	//return strings.Join(cd, ",")
 }
@@ -254,7 +253,6 @@ func EncodePodSingleDevice(pd PodSingleDevice) string {
 		res = res + EncodeContainerDevices(ctrdevs)
 		res = res + OnePodMultiContainerSplitSymbol
 	}
-	klog.Infof("Encoded pod single devices %s", res)
 	return res
 }
 

From feaac9971ed76f37f44405cecfe420bf6e6f8750 Mon Sep 17 00:00:00 2001
From: haitwang-cloud <haitao_wht@outlook.com>
Date: Wed, 9 Apr 2025 14:09:14 +0800
Subject: [PATCH 5/5] feat: add error logging and improve device information
 collection in NVIDIA plugin

---
 logs.txt                                 |   1 +
 pkg/nvidia-plugin/pkg/cdi/cdi.go         |  10 +-
 pkg/nvidia-plugin/pkg/plugin/register.go | 132 ++++++++++++++++++-----
 pkg/nvidia-plugin/pkg/plugin/server.go   |  39 ++++---
 4 files changed, 141 insertions(+), 41 deletions(-)
 create mode 100644 logs.txt

diff --git a/logs.txt b/logs.txt
new file mode 100644
index 000000000..f072e0b0c
--- /dev/null
+++ b/logs.txt
@@ -0,0 +1 @@
+Error: failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running hook #0: error running hook: exit status 1, stdout: , stderr: nvidia-container-cli.real: device error: GPU-3c955d4d-eea8-345a-5477-85f2c1d1ae7b-4: unknown device: unknown
\ No newline at end of file
diff --git a/pkg/nvidia-plugin/pkg/cdi/cdi.go b/pkg/nvidia-plugin/pkg/cdi/cdi.go
index b3227d437..789ebacf2 100644
--- a/pkg/nvidia-plugin/pkg/cdi/cdi.go
+++ b/pkg/nvidia-plugin/pkg/cdi/cdi.go
@@ -227,5 +227,13 @@ func (cdi *cdiHandler) getRootTransformer() transform.Transformer {
 // QualifiedName constructs a CDI qualified device name for the specified resources.
 // Note: This assumes that the specified id matches the device name returned by the naming strategy.
 func (cdi *cdiHandler) QualifiedName(class string, id string) string {
-	return cdiparser.QualifiedName(cdi.vendor, class, id)
+	if id == "" {
+		klog.Error("Empty device ID received")
+		return ""
+	}
+
+	name := cdiparser.QualifiedName(cdi.vendor, class, id)
+	klog.Infof("CDI name generated - Vendor: %s, Class: %s, ID: %s -> %s",
+		cdi.vendor, class, id, name)
+	return name
 }
diff --git a/pkg/nvidia-plugin/pkg/plugin/register.go b/pkg/nvidia-plugin/pkg/plugin/register.go
index 10ce458f0..cd59a6a24 100644
--- a/pkg/nvidia-plugin/pkg/plugin/register.go
+++ b/pkg/nvidia-plugin/pkg/plugin/register.go
@@ -108,56 +108,106 @@ func parseNvidiaNumaInfo(idx int, nvidiaTopoStr string) (int, error) {
 }
 
 func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
+	// Initialize NVML and get device list
 	devs := plugin.Devices()
-	klog.V(5).InfoS("getAPIDevices", "devices", devs)
-	nvml.Init()
+	klog.InfoS("Starting to collect GPU device information", "deviceCount", len(devs))
+
+	// Initialize NVML library
+	if ret := nvml.Init(); ret != nvml.SUCCESS {
+		errMsg := nvml.ErrorString(ret)
+		klog.ErrorS(fmt.Errorf(errMsg), "Failed to initialize NVML")
+		return &[]*util.DeviceInfo{}
+	}
+	defer nvml.Shutdown()
+
 	res := make([]*util.DeviceInfo, 0, len(devs))
+	var errorCount int
+
+	// Process each GPU device
 	for UUID := range devs {
+		// Get device handle by UUID
 		ndev, ret := nvml.DeviceGetHandleByUUID(UUID)
 		if ret != nvml.SUCCESS {
-			klog.Errorln("nvml new device by index error uuid=", UUID, "err=", ret)
-			panic(0)
+			errMsg := nvml.ErrorString(ret)
+			klog.ErrorS(fmt.Errorf(errMsg), "Failed to get device handle",
+				"uuid", UUID, "errorCode", ret)
+			errorCount++
+			continue
 		}
+
+		// Get device index
 		idx, ret := ndev.GetIndex()
 		if ret != nvml.SUCCESS {
-			klog.Errorln("nvml get index error ret=", ret)
-			panic(0)
+			errMsg := nvml.ErrorString(ret)
+			klog.ErrorS(fmt.Errorf(errMsg), "Failed to get device index",
+				"uuid", UUID, "errorCode", ret)
+			errorCount++
+			continue
 		}
-		memoryTotal := 0
+
+		// Get memory information
 		memory, ret := ndev.GetMemoryInfo()
-		if ret == nvml.SUCCESS {
-			memoryTotal = int(memory.Total)
-		} else {
-			klog.Error("nvml get memory error ret=", ret)
-			panic(0)
-		}
-		Model, ret := ndev.GetName()
 		if ret != nvml.SUCCESS {
-			klog.Error("nvml get name error ret=", ret)
-			panic(0)
+			errMsg := nvml.ErrorString(ret)
+			klog.ErrorS(fmt.Errorf(errMsg), "Failed to get memory info",
+				"uuid", UUID, "index", idx)
+			errorCount++
+			continue
 		}
+		memoryTotal := int(memory.Total)
 
+		// Calculate registered memory with scaling factor
 		registeredmem := int32(memoryTotal / 1024 / 1024)
 		if plugin.schedulerConfig.DeviceMemoryScaling != 1 {
+			original := registeredmem
 			registeredmem = int32(float64(registeredmem) * plugin.schedulerConfig.DeviceMemoryScaling)
+			klog.V(4).InfoS("Applied memory scaling",
+				"originalMB", original,
+				"scaledMB", registeredmem,
+				"scalingFactor", plugin.schedulerConfig.DeviceMemoryScaling)
+		}
+
+		// Get device model name
+		Model, ret := ndev.GetName()
+		if ret != nvml.SUCCESS {
+			errMsg := nvml.ErrorString(ret)
+			klog.ErrorS(fmt.Errorf(errMsg), "Failed to get device name",
+				"uuid", UUID, "index", idx)
+			errorCount++
+			continue
 		}
+
+		// Check device health status
 		health := true
 		for _, val := range devs {
 			if strings.Compare(val.ID, UUID) == 0 {
-				// when NVIDIA-Tesla P4, the device info is : ID:GPU-e290caca-2f0c-9582-acab-67a142b61ffa,Health:Healthy,Topology:nil,
-				// it is more reasonable to think of healthy as case-insensitive
-				if strings.EqualFold(val.Health, "healthy") {
-					health = true
-				} else {
-					health = false
+				health = strings.EqualFold(val.Health, "healthy")
+				if !health {
+					klog.Warning("Device is not healthy",
+						"uuid", UUID, "index", idx,
+						"healthStatus", val.Health)
 				}
 				break
 			}
 		}
+
+		// Get NUMA affinity information
 		numa, err := plugin.getNumaInformation(idx)
 		if err != nil {
-			klog.ErrorS(err, "failed to get numa information", "idx", idx)
+			klog.ErrorS(err, "Failed to get NUMA information",
+				"uuid", UUID, "index", idx)
 		}
+
+		// Log successful device collection
+		klog.InfoS("Successfully collected GPU device info",
+			"uuid", UUID,
+			"index", idx,
+			"model", Model,
+			"memoryMB", registeredmem,
+			"numaNode", numa,
+			"healthStatus", health)
+
+		// Add device info to result
 		res = append(res, &util.DeviceInfo{
 			ID:      UUID,
 			Index:   uint(idx),
@@ -170,26 +220,54 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			Health:  health,
 		})
 	}
+
+	// Log summary of device collection
+	if errorCount > 0 {
+		klog.Warning("Failed to collect some GPU device information",
+			"errorCount", errorCount,
+			"totalDevices", len(devs),
+			"successfulDevices", len(res))
+	} else {
+		klog.InfoS("Successfully collected all GPU device information",
+			"deviceCount", len(res))
+	}
+
 	return &res
 }
 
 func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error {
 	devices := plugin.getAPIDevices()
-	klog.InfoS("start working on the devices", "devices", devices)
+	klog.Infof("Starting to register %d devices in node annotation", len(*devices))
+
+	if len(*devices) == 0 {
+		klog.Warning("No GPU devices found to register")
+		return nil
+	}
+	for i, dev := range *devices {
+		klog.InfoS("Device details",
+			"index", i,
+			"uuid", dev.ID,
+			"type", dev.Type,
+			"memoryMB", dev.Devmem,
+			"numaNode", dev.Numa,
+			"health", dev.Health)
+	}
 	annos := make(map[string]string)
 	node, err := util.GetNode(util.NodeName)
 	if err != nil {
 		klog.Errorln("get node error", err.Error())
 		return err
 	}
-	encodeddevices := util.EncodeNodeDevices(*devices)
+	encodedDevices := util.EncodeNodeDevices(*devices)
 	annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String()
-	annos[nvidia.RegisterAnnos] = encodeddevices
+	annos[nvidia.RegisterAnnos] = encodedDevices
 	err = util.PatchNodeAnnotations(node, annos)
-
 	if err != nil {
 		klog.Errorln("patch node error", err.Error())
 	}
+	klog.InfoS("Successfully registered devices in node annotation",
+		"deviceCount", len(*devices),
+		"nodeName", util.NodeName)
 	return err
 }
 
diff --git a/pkg/nvidia-plugin/pkg/plugin/server.go b/pkg/nvidia-plugin/pkg/plugin/server.go
index 94b3cfd14..5e7acc334 100644
--- a/pkg/nvidia-plugin/pkg/plugin/server.go
+++ b/pkg/nvidia-plugin/pkg/plugin/server.go
@@ -358,7 +358,13 @@ func (plugin *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *plugi
 
 // ListAndWatch lists devices and update that list according to the health status
 func (plugin *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
-	if err := s.Send(&pluginapi.ListAndWatchResponse{Devices: plugin.apiDevices()}); err != nil {
+	devices := plugin.apiDevices()
+	klog.Infof("Listing devices for resource %s, count: %d", plugin.rm.Resource(), len(devices))
+
+	for _, dev := range devices {
+		klog.V(5).Infof("Device ID: %s, Health: %s", dev.ID, dev.Health)
+	}
+	if err := s.Send(&pluginapi.ListAndWatchResponse{Devices: devices}); err != nil {
 		return err
 	}
 
@@ -397,14 +403,12 @@ func (plugin *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r
 
 // Allocate which return list of devices.
 func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
-	// 日志：函数开始
 	klog.InfoS("Allocate function started", "request", reqs)
 
 	responses := pluginapi.AllocateResponse{}
 	nodeName := os.Getenv(util.NodeNameEnvName)
 	klog.InfoS("Processing allocate request on node", "nodeName", nodeName)
 
-	// 获取当前待处理的 Pod 信息
 	current, err := util.GetPendingPod(ctx, nodeName)
 	if err != nil {
 		klog.ErrorS(err, "Failed to get pending pod", "nodeName", nodeName)
@@ -416,11 +420,6 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 		containerIndex := idx + 1
 		klog.InfoS("Processing container request", "containerIndex", containerIndex, "totalContainers", len(reqs.ContainerRequests), "namespace", current.Namespace, "podName", current.Name)
 
-		//if err := plugin.rm.ValidateRequest(req.DevicesIDs); err != nil {
-		//	klog.ErrorS(err, "Invalid allocation request", "resource", plugin.rm.Resource(), "devicesIDs", req.DevicesIDs, "namespace", current.Namespace, "podName", current.Name)
-		//	return nil, fmt.Errorf("invalid allocation request for %q: %w", plugin.rm.Resource(), err)
-		//}
-
 		currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current)
 		if err != nil {
 			klog.ErrorS(err, "Failed to get next device request", "nodeName", nodeName, "namespace", current.Namespace, "podName", current.Name)
@@ -490,7 +489,6 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 				},
 			)
 
-			// 检查 CUDA_DISABLE_CONTROL 环境变量是否存在
 			found := false
 			for _, val := range currentCtr.Env {
 				if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
@@ -509,7 +507,6 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 				})
 			}
 
-			// 检查许可证文件是否存在
 			_, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath))
 			if err == nil {
 				response.Mounts = append(response.Mounts, &pluginapi.Mount{
@@ -525,15 +522,12 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 			}
 		}
 
-		// 将容器响应追加到最终响应中
 		klog.InfoS("Appending container response", "containerIndex", containerIndex, "totalContainers", len(reqs.ContainerRequests), "namespace", current.Namespace, "podName", current.Name)
 		responses.ContainerResponses = append(responses.ContainerResponses, response)
 	}
 
-	// 日志：最终分配响应
 	klog.InfoS("Final allocate response generated", "response", responses)
 
-	// 标记 Pod 分配成功
 	device.PodAllocationTrySuccess(nodeName, nvidia.NvidiaGPUDevice, NodeLockNvidia, current)
 	klog.InfoS("Allocate function completed successfully", "response", responses)
 
@@ -541,7 +535,15 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 }
 
 func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*pluginapi.ContainerAllocateResponse, error) {
+	klog.InfoS("Start processing allocation response", "requestIds", requestIds)
+	if plugin.cdiHandler == nil {
+		return nil, fmt.Errorf("CDI handler not initialized")
+	}
 	deviceIDs := plugin.deviceIDsFromAnnotatedDeviceIDs(requestIds)
+	klog.InfoS("Request ID conversion result",
+		"requestIds", requestIds,
+		"deviceIDs", deviceIDs,
+		"strategy", *plugin.config.Flags.Plugin.DeviceIDStrategy)
 
 	// Create an empty response that will be updated as required below.
 	response := &pluginapi.ContainerAllocateResponse{
@@ -549,10 +551,17 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu
 	}
 	if plugin.deviceListStrategies.AnyCDIEnabled() {
 		responseID := uuid.New().String()
+		klog.InfoS("Processing CDI devices",
+			"responseID", responseID,
+			"deviceCount", len(deviceIDs))
 		if err := plugin.updateResponseForCDI(response, responseID, deviceIDs...); err != nil {
 			return nil, fmt.Errorf("failed to get allocate response for CDI: %v", err)
 		}
+		klog.InfoS("CDI response updated successfully",
+			"annotations", response.Annotations,
+			"cdiDevices", response.CDIDevices)
 	}
+
 	// The following modifications are only made if at least one non-CDI device
 	// list strategy is selected.
 	if plugin.deviceListStrategies.AllCDIEnabled() {
@@ -575,6 +584,10 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu
 	if *plugin.config.Flags.MOFEDEnabled {
 		response.Envs["NVIDIA_MOFED"] = "enabled"
 	}
+	klog.InfoS("Successfully generated allocation response",
+		"envs", response.Envs,
+		"devices", response.Devices,
+		"mounts", response.Mounts)
 	return response, nil
 }