Skip to content

Commit 8095307

Browse files
authored
[Refactor]: Remove tokenizer when building engine (#3978)
* to not serialize tokenizer * remove tokenizer when build mp engine * remove tokenizer argument when building engine * use hf tokenizer * use raw tokenizer to align with original
1 parent 7161a4a commit 8095307

File tree

15 files changed

+25
-97
lines changed

15 files changed

+25
-97
lines changed

benchmark/profile_generation.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from lmdeploy.cli.utils import ArgumentHelper, DefaultsAndTypesHelpFormatter
1818
from lmdeploy.messages import GenerationConfig, PytorchEngineConfig, TurbomindEngineConfig
19-
from lmdeploy.tokenizer import Tokenizer
2019
from lmdeploy.utils import get_logger
2120

2221
get_logger('lmdeploy').setLevel('WARNING')
@@ -121,13 +120,12 @@ def profile_throughput(model_path: str, concurrency: int, input_seqlen: int,
121120
f'n_prompt_token: {input_seqlen}, '
122121
f'n_completion_token: {output_seqlen}, '
123122
f'test_round: {test_round}, warmup_round: {warmup_round}')
124-
tokenizer = Tokenizer(model_path)
125123
if isinstance(engine_config, TurbomindEngineConfig):
126124
from lmdeploy.turbomind import TurboMind
127-
tm_model = TurboMind.from_pretrained(model_path, tokenizer=tokenizer, engine_config=engine_config)
125+
tm_model = TurboMind.from_pretrained(model_path, engine_config=engine_config)
128126
elif isinstance(engine_config, PytorchEngineConfig):
129127
from lmdeploy.pytorch.engine import Engine
130-
tm_model = Engine(model_path, tokenizer=tokenizer, engine_config=engine_config)
128+
tm_model = Engine(model_path, engine_config=engine_config)
131129

132130
event_loop = asyncio.new_event_loop()
133131
asyncio.set_event_loop(event_loop)

benchmark/profile_throughput.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,11 @@ def __init__(self, model_path: str, engine_config: Union[PytorchEngineConfig, Tu
138138
self.tokenizer = Tokenizer(model_path)
139139
if isinstance(engine_config, TurbomindEngineConfig):
140140
from lmdeploy.turbomind import TurboMind
141-
tm_model = TurboMind.from_pretrained(model_path, tokenizer=self.tokenizer, engine_config=engine_config)
141+
tm_model = TurboMind.from_pretrained(model_path, engine_config=engine_config)
142142
self.backend = 'turbomind'
143143
elif isinstance(engine_config, PytorchEngineConfig):
144144
from lmdeploy.pytorch.engine import Engine as PytorchEngine
145-
tm_model = PytorchEngine.from_pretrained(model_path, tokenizer=self.tokenizer, engine_config=engine_config)
145+
tm_model = PytorchEngine.from_pretrained(model_path, engine_config=engine_config)
146146
self.backend = 'pytorch'
147147

148148
self.tm_model = tm_model

lmdeploy/pytorch/engine/engine.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -316,21 +316,18 @@ class Engine(EngineBase):
316316
317317
Args:
318318
model_path (str): The hugging face model path.
319-
tokenizer (lmdeploy.Tokenizer): an instance of lmdeploy.Tokenizer
320319
engine_config (PytorchEngineConfig): The config of the Engine.
321320
trust_remote_code (bool): Trust remote code.
322321
"""
323322

324323
def __init__(self,
325324
model_path: str,
326-
tokenizer: object,
327325
engine_config: PytorchEngineConfig = None,
328326
trust_remote_code: bool = True) -> None:
329327
# make sure engine config exist
330328
engine_config = _update_engine_config(engine_config)
331329

332330
# dist args
333-
self.tokenizer = tokenizer
334331
self.tp = engine_config.tp
335332
self.dp = engine_config.dp
336333
self.dp_rank = engine_config.dp_rank
@@ -358,15 +355,11 @@ def __init__(self,
358355
misc_config = _build_misc_config(engine_config)
359356

360357
# build model agent
361-
raw_tokenizer = None
362-
if tokenizer is not None:
363-
raw_tokenizer = tokenizer.model.model
364358
self.executor = build_executor(model_path,
365359
cache_config=cache_config,
366360
backend_config=backend_config,
367361
dist_config=dist_config,
368362
misc_config=misc_config,
369-
tokenizer=raw_tokenizer,
370363
adapters=adapters,
371364
device_type=engine_config.device_type,
372365
distributed_executor_backend=engine_config.distributed_executor_backend,
@@ -406,7 +399,6 @@ def __init__(self,
406399
@classmethod
407400
def from_pretrained(cls,
408401
pretrained_model_name_or_path: str,
409-
tokenizer: object,
410402
engine_config: PytorchEngineConfig = None,
411403
trust_remote_code: bool = True,
412404
**kwargs):
@@ -423,7 +415,6 @@ def from_pretrained(cls,
423415
on huggingface.co, such as "InternLM/internlm-chat-7b",
424416
"Qwen/Qwen-7B-Chat ", "baichuan-inc/Baichuan2-7B-Chat"
425417
and so on.
426-
tokenizer (lmdeploy.Tokenizer): an instance of lmdeploy.Tokenizer
427418
engine_config (PytorchEngineConfig): Pytorch engine config.
428419
trust_remote_code (bool): Trust remote code
429420
"""
@@ -432,13 +423,11 @@ def from_pretrained(cls,
432423
backend = engine_config.mp_engine_backend
433424
return build_mp_engine(backend=backend,
434425
model_path=pretrained_model_name_or_path,
435-
tokenizer=tokenizer,
436426
engine_config=engine_config,
437427
trust_remote_code=trust_remote_code)
438428
if len(kwargs) > 0:
439429
logger.debug(f'Get unexpected kwargs: {kwargs}')
440430
return cls(model_path=pretrained_model_name_or_path,
441-
tokenizer=tokenizer,
442431
engine_config=engine_config,
443432
trust_remote_code=trust_remote_code)
444433

lmdeploy/pytorch/engine/executor/__init__.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
from logging import Logger
3-
from typing import Any, Dict
3+
from typing import Dict
44

55
from lmdeploy.pytorch import envs
66
from lmdeploy.pytorch.config import BackendConfig, CacheConfig, DistConfig, MiscConfig, ModelConfig
@@ -58,7 +58,6 @@ def build_executor(model_path: str,
5858
backend_config: BackendConfig,
5959
dist_config: DistConfig,
6060
misc_config: MiscConfig,
61-
tokenizer: Any,
6261
adapters: Dict[str, str] = None,
6362
device_type: str = 'cuda',
6463
distributed_executor_backend: str = None,
@@ -98,7 +97,6 @@ def build_executor(model_path: str,
9897
cache_config=cache_config,
9998
backend_config=backend_config,
10099
misc_config=misc_config,
101-
tokenizer=tokenizer,
102100
adapters=adapters,
103101
device_type=device_type,
104102
)
@@ -111,7 +109,6 @@ def build_executor(model_path: str,
111109
backend_config=backend_config,
112110
dist_config=dist_config,
113111
misc_config=misc_config,
114-
tokenizer=tokenizer,
115112
adapters=adapters,
116113
device_type=device_type,
117114
)
@@ -124,7 +121,6 @@ def build_executor(model_path: str,
124121
backend_config=backend_config,
125122
dist_config=dist_config,
126123
misc_config=misc_config,
127-
tokenizer=tokenizer,
128124
adapters=adapters,
129125
device_type=device_type,
130126
dtype=dtype,

lmdeploy/pytorch/engine/executor/base.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def __init__(self,
2323
backend_config: BackendConfig,
2424
dist_config: DistConfig,
2525
misc_config: MiscConfig,
26-
tokenizer: Any,
2726
adapters: Dict[str, str] = None,
2827
device_type: str = 'cuda'):
2928
"""Initialize Executor."""
@@ -37,7 +36,6 @@ def __init__(self,
3736
self.backend_config = backend_config
3837
self.dist_config = dist_config
3938
self.misc_config = misc_config,
40-
self.tokenizer = tokenizer
4139
self.dp = dist_config.dp
4240
self.tp = dist_config.tp
4341
self.world_size = dist_config.world_size

lmdeploy/pytorch/engine/executor/base_worker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ def __init__(
2929
misc_config: MiscConfig,
3030
adapters: Dict[str, str] = None,
3131
device_type: str = 'cuda',
32-
tokenizer: Any = None,
3332
log_level: int = 30,
3433
):
3534
self.model_path = model_path
@@ -38,7 +37,6 @@ def __init__(
3837
self.backend_config = backend_config
3938
self.dist_config = dist_config
4039
self.misc_config = misc_config
41-
self.tokenizer = tokenizer
4240
self.adapters = adapters
4341
self.device_type = device_type
4442
self.log_level = log_level
@@ -96,7 +94,6 @@ def build_model(self):
9694
cache_config=self.cache_config,
9795
backend_config=self.backend_config,
9896
misc_config=self.misc_config,
99-
tokenizer=self.tokenizer,
10097
device_ctx=self.device_ctx,
10198
dist_ctx=self.dist_ctx,
10299
adapters=self.adapters)

lmdeploy/pytorch/engine/executor/mp_executor.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,15 +224,13 @@ def __init__(self,
224224
backend_config: BackendConfig,
225225
dist_config: DistConfig,
226226
misc_config: MiscConfig,
227-
tokenizer: Any,
228227
adapters: Dict[str, str] = None,
229228
device_type: str = 'cuda'):
230229
"""Initialize Executor."""
231230
super().__init__(model_path=model_path,
232231
model_config=model_config,
233232
cache_config=cache_config,
234233
backend_config=backend_config,
235-
tokenizer=tokenizer,
236234
dist_config=dist_config,
237235
misc_config=misc_config,
238236
adapters=adapters,
@@ -266,7 +264,6 @@ def __init__(self,
266264
backend_config=backend_config,
267265
dist_config=dist_config,
268266
misc_config=misc_config,
269-
tokenizer=tokenizer,
270267
adapters=adapters,
271268
device_type=device_type,
272269
log_level=logger.level)
@@ -430,7 +427,6 @@ def __init__(
430427
misc_config: MiscConfig,
431428
adapters: Dict[str, str] = None,
432429
device_type: str = 'cuda',
433-
tokenizer: Any = None,
434430
log_level: int = 30,
435431
):
436432
super().__init__(
@@ -442,7 +438,6 @@ def __init__(
442438
misc_config=misc_config,
443439
adapters=adapters,
444440
device_type=device_type,
445-
tokenizer=tokenizer,
446441
log_level=log_level,
447442
)
448443

@@ -491,7 +486,6 @@ def _main_loop(
491486
backend_config: BackendConfig,
492487
dist_config: DistConfig,
493488
misc_config: MiscConfig,
494-
tokenizer: Any,
495489
adapters: Dict[str, str] = None,
496490
device_type: str = 'cuda',
497491
log_level: int = 30,
@@ -515,7 +509,6 @@ def handle_sigterm(signum, frame):
515509
misc_config=misc_config,
516510
adapters=adapters,
517511
device_type=device_type,
518-
tokenizer=tokenizer,
519512
log_level=log_level)
520513
try_import_deeplink(device_type)
521514
worker.init_process_group(proc_id)

lmdeploy/pytorch/engine/executor/ray_executor.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,6 @@ def __init__(
160160
init_backend(device_type)
161161
try_import_deeplink(device_type)
162162

163-
from lmdeploy.tokenizer import Tokenizer
164-
tokenizer = Tokenizer(model_path).model.model
165163
model_config = ModelConfig.from_pretrained(model_path,
166164
dtype=dtype,
167165
hf_overrides=misc_config.hf_overrides,
@@ -176,7 +174,6 @@ def __init__(
176174
misc_config=misc_config,
177175
adapters=adapters,
178176
device_type=device_type,
179-
tokenizer=tokenizer,
180177
log_level=log_level,
181178
)
182179
self.node_ip = ray.util.get_node_ip_address()
@@ -232,7 +229,6 @@ def __init__(self,
232229
backend_config: BackendConfig,
233230
dist_config: DistConfig,
234231
misc_config: MiscConfig,
235-
tokenizer: Any,
236232
adapters: Dict[str, str] = None,
237233
device_type: str = 'cuda',
238234
dtype: str = 'auto'):
@@ -243,7 +239,6 @@ def __init__(self,
243239
backend_config=backend_config,
244240
dist_config=dist_config,
245241
misc_config=misc_config,
246-
tokenizer=tokenizer,
247242
adapters=adapters,
248243
device_type=device_type)
249244

lmdeploy/pytorch/engine/executor/uni_executor.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
import asyncio
3-
from typing import Any, Dict, List
3+
from typing import Dict, List
44

55
from lmdeploy.pytorch.config import BackendConfig, CacheConfig, DistConfig, MiscConfig, ModelConfig
66
from lmdeploy.pytorch.devices import DeviceContext
@@ -23,7 +23,6 @@ def __init__(self,
2323
cache_config: CacheConfig,
2424
backend_config: BackendConfig,
2525
misc_config: MiscConfig,
26-
tokenizer: Any,
2726
adapters: Dict[str, str] = None,
2827
device_type: str = 'cuda'):
2928
"""Initialize Executor."""
@@ -33,7 +32,6 @@ def __init__(self,
3332
backend_config=backend_config,
3433
dist_config=DistConfig(),
3534
misc_config=misc_config,
36-
tokenizer=tokenizer,
3735
adapters=adapters,
3836
device_type=device_type)
3937

@@ -43,7 +41,6 @@ def __init__(self,
4341
cache_config=cache_config,
4442
backend_config=backend_config,
4543
misc_config=misc_config,
46-
tokenizer=tokenizer,
4744
device_ctx=self.device_ctx,
4845
adapters=adapters)
4946

lmdeploy/pytorch/engine/model_agent.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from lmdeploy.pytorch.disagg.config import EngineRole
1818
from lmdeploy.serve.openai.protocol import UpdateParamsRequest
19+
from lmdeploy.tokenizer import Tokenizer
1920
from lmdeploy.utils import get_logger
2021

2122
from ..backends import get_backend
@@ -307,16 +308,16 @@ def __init__(self,
307308
cache_config: CacheConfig,
308309
backend_config: BackendConfig,
309310
misc_config: MiscConfig,
310-
tokenizer: Any,
311311
dist_ctx: DistContext,
312312
device_ctx: DeviceContext,
313313
adapters: Dict[str, str] = None):
314314

315315
self.model_config = model_config
316316
self.cache_config = cache_config
317-
self.tokenizer = tokenizer
317+
# use raw tokenizer
318+
self.tokenizer = Tokenizer(model_path).model.model
318319
try:
319-
self.sampling_vocab_size = len(tokenizer)
320+
self.sampling_vocab_size = len(self.tokenizer)
320321
except BaseException:
321322
self.sampling_vocab_size = None
322323

@@ -1158,7 +1159,6 @@ def build_model_agent(model_path: str,
11581159
cache_config: CacheConfig,
11591160
backend_config: BackendConfig,
11601161
misc_config: MiscConfig,
1161-
tokenizer: Any,
11621162
dist_ctx: DistContext = None,
11631163
device_ctx: DeviceContext = None,
11641164
adapters: Dict[str, str] = None):
@@ -1187,7 +1187,6 @@ def build_model_agent(model_path: str,
11871187
cache_config=cache_config,
11881188
backend_config=backend_config,
11891189
misc_config=misc_config,
1190-
tokenizer=tokenizer,
11911190
adapters=adapters,
11921191
dist_ctx=dist_ctx,
11931192
device_ctx=device_ctx,

0 commit comments

Comments
 (0)