@@ -105,14 +105,34 @@ var (
105
105
)
106
106
ctrDeviceMemorydesc = prometheus .NewDesc (
107
107
"Device_memory_desc_of_container" ,
108
- "Container device meory description" ,
108
+ "Container device meory description. The label context, module, data and offset will be deprecated in 2.8.0. " ,
109
109
[]string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" , "context" , "module" , "data" , "offset" }, nil ,
110
110
)
111
111
ctrDeviceUtilizationdesc = prometheus .NewDesc (
112
112
"Device_utilization_desc_of_container" ,
113
113
"Container device utilization description" ,
114
114
[]string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" }, nil ,
115
115
)
116
+ ctrDeviceMemoryContext = prometheus .NewDesc (
117
+ "Device_memory_context_of_container" ,
118
+ "Container device memory context description" ,
119
+ []string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" }, nil ,
120
+ )
121
+ ctrDeviceMemoryModule = prometheus .NewDesc (
122
+ "Device_memory_module_of_container" ,
123
+ "Container device memory module description" ,
124
+ []string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" }, nil ,
125
+ )
126
+ ctrDeviceMemoryData = prometheus .NewDesc (
127
+ "Device_memory_data_of_container" ,
128
+ "Container device memory data description" ,
129
+ []string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" }, nil ,
130
+ )
131
+ ctrDeviceMemoryOffset = prometheus .NewDesc (
132
+ "Device_memory_data_of_container" ,
133
+ "Container device memory data description" ,
134
+ []string {"podnamespace" , "podname" , "ctrname" , "vdeviceid" , "deviceuuid" }, nil ,
135
+ )
116
136
ctrDeviceLastKernelDesc = prometheus .NewDesc (
117
137
"Device_last_kernel_of_container" ,
118
138
"Container device last kernel description" ,
@@ -400,11 +420,31 @@ func (cc ClusterManagerCollector) collectContainerMetrics(ch chan<- prometheus.M
400
420
401
421
// Send memory-related metrics with additional labels
402
422
memoryLabels := append (labels , fmt .Sprint (memoryContextSize ), fmt .Sprint (memoryModuleSize ), fmt .Sprint (memoryBufferSize ), fmt .Sprint (memoryOffset ))
403
- if err := sendMetric (ch , ctrDeviceMemorydesc , prometheus .CounterValue , float64 (memoryTotal ), memoryLabels ... ); err != nil {
423
+ if err := sendMetric (ch , ctrDeviceMemorydesc , prometheus .GaugeValue , float64 (memoryTotal ), memoryLabels ... ); err != nil {
404
424
klog .Errorf ("Failed to send memory-related metrics for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
405
425
return err
406
426
}
407
427
428
+ if err := sendMetric (ch , ctrDeviceMemoryContext , prometheus .GaugeValue , float64 (memoryContextSize ), labels ... ); err != nil {
429
+ klog .Errorf ("Failed to send memory context metrics for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
430
+ return err
431
+ }
432
+
433
+ if err := sendMetric (ch , ctrDeviceMemoryModule , prometheus .GaugeValue , float64 (memoryModuleSize ), labels ... ); err != nil {
434
+ klog .Errorf ("Failed to send memory module metrics for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
435
+ return err
436
+ }
437
+
438
+ if err := sendMetric (ch , ctrDeviceMemoryData , prometheus .GaugeValue , float64 (memoryBufferSize ), labels ... ); err != nil {
439
+ klog .Errorf ("Failed to send memory buffer metrics for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
440
+ return err
441
+ }
442
+
443
+ if err := sendMetric (ch , ctrDeviceMemoryOffset , prometheus .GaugeValue , float64 (memoryOffset ), labels ... ); err != nil {
444
+ klog .Errorf ("Failed to send memory offset metrics for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
445
+ return err
446
+ }
447
+
408
448
if err := sendMetric (ch , ctrDeviceUtilizationdesc , prometheus .GaugeValue , float64 (smUtil ), labels ... ); err != nil {
409
449
klog .Errorf ("Failed to send SM utilization metric for device %d in Pod %s/%s, Container %s: %v" , i , pod .Namespace , pod .Name , ctr .Name , err )
410
450
return err
0 commit comments