Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions vllm/model_executor/models/roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,19 +124,6 @@ def forward_hpu(
pos_list.append(position_ids[offset])
token_list.append(input_ids[offset])

for index, (positions, tokens, seq_len) in enumerate(
zip(pos_list, token_list, seq_lens)):
# Verify assumption that incoming position are
# always a sequence from 0 to N.
expected_pos = torch.arange(positions.size()[0],
dtype=torch.long,
device=inputs_embeds.device)
valid_input_mask = expected_pos < seq_len
expected_pos = expected_pos * valid_input_mask
assert torch.equal(positions, expected_pos)
position_ids[index] = create_position_ids_from_input_ids_hpu(
tokens, self.padding_idx, seq_len)

# Position embeddings.
position_embeddings = self.position_embeddings(position_ids)
if token_type_ids is None:
Expand Down Expand Up @@ -207,37 +194,6 @@ def forward_cuda(
return self.forward_native(input_ids, seq_lens, position_ids,
token_type_ids)


# Adapted from transformers
def create_position_ids_from_input_ids_hpu(input_ids,
padding_idx,
seq_len,
past_key_values_length=0):
"""
Replace non-padding symbols with their position numbers.
Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
x: torch.Tensor x:

Returns: torch.Tensor
"""
# The series of casts and type-conversions here are carefully
# balanced to both work with ONNX export and XLA.
valid_input_mask = torch.arange(input_ids.size()[0],
dtype=torch.int,
device=input_ids.device)
valid_input_mask = valid_input_mask < seq_len

mask = input_ids.ne(padding_idx).int()

incremental_indices = (torch.cumsum(mask, dim=0).type_as(mask) +
past_key_values_length) * mask

return (incremental_indices.long() + padding_idx) * valid_input_mask


# Adapted from transformers
class RobertaClassificationHead(nn.Module):
"""Head for sentence-level classification tasks."""
Expand Down
17 changes: 16 additions & 1 deletion vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,22 @@ def _prepare_prompt(
input_tokens.append(prompt_tokens)
# NOTE(woosuk): Here we assume that the first token in the prompt
# is always the first token in the sequence.
input_positions.append(list(range(context_len, seq_len)))
if "RobertaEmbeddingModel" in str(type(self.model.model)):
padding_idx = getattr(self.model.model.model.embeddings,
"padding_idx", 1)
tokens_cpu = torch.tensor(prompt_tokens,
dtype=torch.long,
device="cpu").clone().contiguous()
mask = tokens_cpu.ne(padding_idx).to(torch.int32)
incremental_indices = (
torch.cumsum(mask, dim=0).to(torch.int32) * mask)
pos_cpu = incremental_indices.to(torch.int64) + padding_idx
if seq_len < pos_cpu.numel():
pos_cpu[seq_len:] = 0
pos_hpu = pos_cpu.to("hpu", non_blocking=False)
input_positions.append(pos_hpu.tolist())
else:
input_positions.append(list(range(context_len, seq_len)))

seq_data_mrope_positions: Optional[List[List[int]]] = None

Expand Down
Loading