-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Description
Describe the bug
I would like to use 'ec2 describe-instance-types' to query for Nvidia GPUs with certain amounts of VRAM memory to meet the minimum memory need for some applications. Unfortunately some of the newer GPUs (e.g. L40S, L4) count the memory wrong .MemoryInfo.SizeInMiB .
Regression Issue
- Select this option if this issue appears to be a regression.
Expected Behavior
The L40S GPU has about 48GB VRAM and for 4 of them I expect 183105MB
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 45,776
}
}
],
"TotalGpuMemoryInMiB": 183105
},
Current Behavior
Now this is wrong, 4 L40S do not have 732420 MB
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 183105
}
}
],
"TotalGpuMemoryInMiB": 732420
},
Reproduction Steps
I would like to use 'ec2 describe-instance-types' to query for Nvidia GPUs with certain amounts of VRAM memory to meet the minimum need for some applications, I use :
instance_types=$(aws ec2 describe-instance-types \
--region ${AWS_REGION} \
--query "${query}" \
--output json)
and query is :
InstanceTypes[?to_number(VCpuInfo.DefaultVCpus) >= `1` && to_number(MemoryInfo.SizeInMiB) >= `1024` && (to_number(GpuInfo.Gpus[?Manufacturer=='NVIDIA'].Count | [0]) >= `1`) && (to_number(GpuInfo.Gpus[?Manufacturer=='NVIDIA'].MemoryInfo.SizeInMiB | [0]) >= `1024`)]
this gives me a long list of options, see attached json file :
instance_types.json
Then I run this search on instance_types.json:
grep -A 12 GpuInfo instance_types.json
and it will show this:
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 91553
}
}
],
"TotalGpuMemoryInMiB": 366212
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 24576
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 366211
}
}
],
"TotalGpuMemoryInMiB": 2929688
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "M60",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 8192
}
}
],
"TotalGpuMemoryInMiB": 32768
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 24576
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 45776
}
}
],
"TotalGpuMemoryInMiB": 45776
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "M60",
"Manufacturer": "NVIDIA",
"Count": 2,
"MemoryInfo": {
"SizeInMiB": 8192
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 98304
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 45776
}
}
],
"TotalGpuMemoryInMiB": 45776
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "K80",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 12288
}
}
],
"TotalGpuMemoryInMiB": 98304
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 183105
}
}
],
"TotalGpuMemoryInMiB": 1464840
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 65536
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 131072
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 2,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 32768
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 98304
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "V100",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "M60",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 8192
}
}
],
"TotalGpuMemoryInMiB": 8192
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 183105
}
}
],
"TotalGpuMemoryInMiB": 732420
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A100",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 40960
}
}
],
"TotalGpuMemoryInMiB": 327680
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "H100",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 81920
}
}
],
"TotalGpuMemoryInMiB": 655360
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 2,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 32768
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 24576
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 24576
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "V100",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 131072
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 91553
}
}
],
"TotalGpuMemoryInMiB": 366212
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 45776
}
}
],
"TotalGpuMemoryInMiB": 45776
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 45776
}
}
],
"TotalGpuMemoryInMiB": 45776
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 45776
}
}
],
"TotalGpuMemoryInMiB": 45776
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "K80",
"Manufacturer": "NVIDIA",
"Count": 16,
"MemoryInfo": {
"SizeInMiB": 12288
}
}
],
"TotalGpuMemoryInMiB": 196608
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "V100",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 32768
}
}
],
"TotalGpuMemoryInMiB": 262144
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "M60",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 8192
}
}
],
"TotalGpuMemoryInMiB": 8192
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "V100",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 65536
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 8,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 196608
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 22888
}
}
],
"TotalGpuMemoryInMiB": 22888
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "A10G",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 24576
}
}
],
"TotalGpuMemoryInMiB": 24576
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "K80",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 12288
}
}
],
"TotalGpuMemoryInMiB": 12288
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "L40S",
"Manufacturer": "NVIDIA",
"Count": 4,
"MemoryInfo": {
"SizeInMiB": 183105
}
}
],
"TotalGpuMemoryInMiB": 732420
},
--
"GpuInfo": {
"Gpus": [
{
"Name": "T4g",
"Manufacturer": "NVIDIA",
"Count": 1,
"MemoryInfo": {
"SizeInMiB": 16384
}
}
],
"TotalGpuMemoryInMiB": 16384
},
Possible Solution
No response
Additional Information/Context
No response
CLI version used
aws-cli/2.17.47 Python/3.11.9 Linux/5.15.153.1-microsoft-standard-WSL2 exe/x86_64.debian.11
Environment details (OS name and version, etc.)
Debian 11 / WSL Windows 11 / Kernel 515