Skip to content

Commit 8e0d680

Browse files
authored
Support InternVL3.5-Flash (#3952)
* support internvl flash * clean * fix * fix context update for multi requests * dropout to identity, remove clone, fix type * fix acc, explicit dtype, optimize * get seqlen from context, pass context in post update * remove self.model_metas
1 parent d91512a commit 8e0d680

File tree

3 files changed

+383
-29
lines changed

3 files changed

+383
-29
lines changed

lmdeploy/pytorch/engine/model_agent.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,12 @@ def model_forward(
239239
context=context,
240240
)
241241
output = model(**input_dict)
242-
return dict(hidden_states=output, model_metas=model_metas)
242+
243+
# InternVL-3.5-Flash will change the seqlen, model_metas during forward
244+
model_metas = context.model_metas
245+
seq_length = context.q_seqlens
246+
247+
return dict(hidden_states=output, model_metas=model_metas, seq_length=seq_length)
243248

244249

245250
@record_function('stopping_criteria')
@@ -503,7 +508,11 @@ async def __long_context_single_forward(new_inputs, max_seqlen: int):
503508
if not is_long_context:
504509
ret = await __forward(inputs)
505510
if not return_logits and not inputs.is_decoding:
506-
last_token_loc = inputs.seq_length.cumsum(0) - 1
511+
# fetch seq_length from the context, since models may change it (e.g. InternVL-3.5-Flash)
512+
seq_length = ret.get('seq_length', None)
513+
assert seq_length is not None, 'seq_length cannot be None.'
514+
last_token_loc = seq_length.cumsum(0) - 1
515+
507516
ret['hidden_states'] = ret['hidden_states'][:, last_token_loc]
508517
else:
509518
ret = await __long_context_single_forward(inputs, max_seqlen)

0 commit comments

Comments
 (0)