Skip to content

Commit ddee22f

Browse files
wangxiyuanxuebwang-amd
authored andcommitted
[Misc] rename torch_dtype to dtype (vllm-project#26695)
Signed-off-by: wangxiyuan <[email protected]> Signed-off-by: xuebwang-amd <[email protected]>
1 parent 31f3bb5 commit ddee22f

File tree

30 files changed

+52
-55
lines changed

30 files changed

+52
-55
lines changed

benchmarks/kernels/benchmark_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ def main(args: argparse.Namespace):
631631
else:
632632
ensure_divisibility(intermediate_size, args.tp_size, "intermediate_size")
633633
shard_intermediate_size = 2 * intermediate_size // args.tp_size
634-
dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype
634+
dtype = torch.float16 if current_platform.is_rocm() else config.dtype
635635
use_fp8_w8a8 = args.dtype == "fp8_w8a8"
636636
use_int8_w8a16 = args.dtype == "int8_w8a16"
637637
block_quant_shape = get_weight_block_size_safety(config)

benchmarks/kernels/benchmark_moe_permute_unpermute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def main(args: argparse.Namespace):
344344
topk = config.num_experts_per_tok
345345

346346
hidden_size = config.hidden_size
347-
dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype
347+
dtype = torch.float16 if current_platform.is_rocm() else config.dtype
348348
use_fp8_w8a8 = args.dtype == "fp8_w8a8"
349349
use_int8_w8a16 = args.dtype == "int8_w8a16"
350350
use_customized_permute = args.use_customized_permute

docs/features/quantization/auto_round.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5858
from auto_round import AutoRound
5959

6060
model_name = "Qwen/Qwen3-0.6B"
61-
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")
61+
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="auto")
6262
tokenizer = AutoTokenizer.from_pretrained(model_name)
6363

6464
bits, group_size, sym = 4, 128, True

docs/features/quantization/fp8.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
4343
model = AutoModelForCausalLM.from_pretrained(
4444
MODEL_ID,
4545
device_map="auto",
46-
torch_dtype="auto",
46+
dtype="auto",
4747
)
4848
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
4949
```

docs/features/quantization/int4.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
4141
model = AutoModelForCausalLM.from_pretrained(
4242
MODEL_ID,
4343
device_map="auto",
44-
torch_dtype="auto",
44+
dtype="auto",
4545
)
4646
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
4747
```

docs/features/quantization/int8.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
4646
model = AutoModelForCausalLM.from_pretrained(
4747
MODEL_ID,
4848
device_map="auto",
49-
torch_dtype="auto",
49+
dtype="auto",
5050
)
5151
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
5252
```

docs/features/quantization/quantized_kvcache.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ Here's a complete example using `meta-llama/Llama-3.1-8B-Instruct` (most models
8282

8383
# Select model and load it
8484
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
85-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
85+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", dtype="auto")
8686
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
8787

8888
# Select calibration dataset

docs/features/quantization/quark.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ to fetch model and tokenizer.
5050
model = AutoModelForCausalLM.from_pretrained(
5151
MODEL_ID,
5252
device_map="auto",
53-
torch_dtype="auto",
53+
dtype="auto",
5454
)
5555
model.eval()
5656

docs/features/quantization/torchao.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ You can quantize your own huggingface model with torchao, e.g. [transformers](ht
2727
quantization_config = TorchAoConfig(Int8WeightOnlyConfig())
2828
quantized_model = AutoModelForCausalLM.from_pretrained(
2929
model_name,
30-
torch_dtype="auto",
30+
dtype="auto",
3131
device_map="auto",
3232
quantization_config=quantization_config
3333
)

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ requests >= 2.26.0
77
tqdm
88
blake3
99
py-cpuinfo
10-
transformers >= 4.55.2
10+
transformers >= 4.56.0
1111
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
1212
protobuf # Required by LlamaTokenizer.
1313
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.

0 commit comments

Comments
 (0)