Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,12 @@ class TestLLMServer:
def test_basic_generation(
self,
server: tuple[Any, int, ServerConfig],
request: pytest.FixtureRequest,
) -> None:
"""Tests basic text generation capabilities.

Args:
server: Tuple of (process, port) from server fixture
"""
test_id = request.node.callspec.id
if "trie" in test_id:
pytest.skip(
reason="TrieAttentionCache APIs are under development, skip it for now."
)

process, port, config = server
assert process.poll() is None, "Server process terminated unexpectedly"
dataset = (
Expand Down Expand Up @@ -145,19 +138,12 @@ def test_basic_generation(
def test_multi_page_generation(
self,
server: tuple[Any, int, ServerConfig],
request: pytest.FixtureRequest,
) -> None:
"""Tests multi-page text generation capabilities.

Args:
server: Tuple of (process, port) from server fixture
"""
test_id = request.node.callspec.id
if "trie" in test_id:
pytest.skip(
reason="TrieAttentionCache APIs are under development, skip it for now."
)

process, port, config = server
assert process.poll() is None, "Server process terminated unexpectedly"
dataset = (
Expand Down Expand Up @@ -209,10 +195,6 @@ def test_concurrent_generation(
pytest.skip(
reason="Known issue with chunked prefill in batch case: https://github.com/nod-ai/shark-ai/issues/2235"
)
if "trie" in test_id:
pytest.skip(
reason="TrieAttentionCache APIs are under development, skip it for now."
)

process, port, config = server
assert process.poll() is None, "Server process terminated unexpectedly"
Expand Down Expand Up @@ -257,18 +239,12 @@ def _generate_task(prompt: str, port: int):
def test_single_greedy_switch(
self,
server: tuple[Any, int, ServerConfig],
request: pytest.FixtureRequest,
):
"""Tests switching to single-beam greedy generation.

Args:
server: Tuple of (process, port, config) from server fixture
"""
test_id = request.node.callspec.id
if "trie" in test_id:
pytest.skip(
reason="TrieAttentionCache APIs are under development, skip it for now."
)
process, port, _ = server
assert process.poll() is None, "Server process terminated unexpectedly"

Expand Down Expand Up @@ -316,10 +292,6 @@ def test_beam_search_switch(
pytest.skip(
"Beam search with 2 beams isn't compatible with logits returned by GPU argmax model."
)
if "trie" in test_id:
pytest.skip(
reason="TrieAttentionCache APIs are under development, skip it for now."
)

process, port, _ = server
assert process.poll() is None, "Server process terminated unexpectedly"
Expand Down
Loading
Loading