[Doc] Fix cross-reference warnings (vllm-project#25058)

punitvara · hmellor · web-flow · commit 05b044e698bb · 2025-09-18T02:05:16.000-07:00
Signed-off-by: Punit Vara &lt;punitvara@gmail.com&gt;
Signed-off-by: Harry Mellor &lt;19981378+hmellor@users.noreply.github.com&gt;
Co-authored-by: Harry Mellor &lt;19981378+hmellor@users.noreply.github.com&gt;
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
@@ -171,7 +171,8 @@ def get_random_lora_request(
                 If `None`, LoRA is not used.
 
         Returns:
-            A new [LoRARequest][] (or `None` if not applicable).
+            A new [`LoRARequest`][vllm.lora.request.LoRARequest]
+            (or `None` if not applicable).
         """
         if max_loras is None or lora_path is None:
             return None
diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py
@@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
     - Maintains metadata for each allocated buffer chunk in the writer process
     - Supports custom "is_free_fn" functions to determine when buffers can be
       reused
-    - Each buffer chunk contains: [4-byte id][4-byte size][actual_data]
+    - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
     
     Key Concepts:
     - monotonic_id_start/end: Track the range of active buffer IDs
@@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
     - Writer handles garbage collection (free_buf) based on reader feedback
     
     Memory Layout per Buffer Chunk:
-    [4-byte monotonic_id][4-byte chunk_size][actual_data...]
+    `[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
     ^metadata_start                         ^data_start
     
     The monotonic_id ensures data integrity - readers can verify they're
@@ -185,7 +185,7 @@ def allocate_buf(self, size: int) -> tuple[int, int]:
         '''
         Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
         Memory layout:
-        [4-byte monotonic_id][4-byte size][buffer data...]
+        `[4-byte monotonic_id][4-byte size][buffer data...]`
         '''
         assert self.is_writer, "Only the writer can allocate buffers."
         assert size > 0, "Size must be greater than 0"
@@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
       allocation
 
     Memory Layout per Object:
-    [4-byte reference_count][metadata_size][serialized_object_data]
+    `[4-byte reference_count][metadata_size][serialized_object_data]`
     
     Thread Safety:
     - Writer operations (put, clear) are single-threaded by design
diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
@@ -927,11 +927,13 @@ def causal_conv1d_update(
     validate_data=False,
 ):
     """
-    x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim)
-        [shape=2: single token prediction]
-        [shape=3: single or multiple tokens prediction]
-        [shape=2 with num_tokens: continuous batching, where num_tokens is the
-                                  total tokens of all sequences in that batch]
+    x: Input tensor which can take the following shapes:
+
+    - `[batch, dim]` - single token prediction
+    - `[batch, dim, seqlen]` - single or multiple tokens prediction
+    - `[num_tokens, dim]` - continuous batching, where num_tokens is
+        the total tokens of all sequences in that batch
+
     conv_state: (..., dim, state_len), where state_len >= width - 1
     weight: (dim, width)
     bias: (dim,)
diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py
@@ -583,7 +583,7 @@ def forward(
             inputs_embeds: Optional tensor of input embeddings.
 
         Info:
-            [Mistral3ImagePixelInputs][]
+            [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
         """
         if intermediate_tensors is not None:
             inputs_embeds = None
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
@@ -301,7 +301,7 @@ def get_mm_max_contiguous_tokens(
         Returns the maximum length of the multimodal (image placeholders+text)
         tokens, including any break/text tokens in-between image embeddings.
 
-        <im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
+        `<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
         Returns 9, even when the number of image embeddings is 6.
         
         This is important to take into account when profiling and
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -24,8 +24,9 @@ class KVCacheBlocks:
     """
     blocks: tuple[list[KVCacheBlock], ...]
     """
-    blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens.
-    We don't use block of tokens as the outer dimension because it assumes all
+    `blocks[i][j]` refers to the i-th kv_cache_group
+    and the j-th block of tokens.We don't use block of
+    tokens as the outer dimension because it assumes all
     kv_cache_groups have the same number of blocks, which is true for now but 
     will be broken if we want to give different block_size to different 
     kv_cache_groups in the future.