@@ -59,12 +59,15 @@ def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str:
5959 # Pick a random template
6060 base_prompt = random .choice (prompt_templates )
6161
62- # Add some padding to vary the length if needed
63- if min_words > 50 :
62+ if max_words < min_words :
63+ max_words = min_words
64+ target_words = random .randint (min_words , max_words )
65+
66+ if target_words > 50 :
6467 # For longer prompts, repeat context
6568 padding_text = (
6669 " This is an interesting topic that deserves more explanation. "
67- * (min_words // 50 )
70+ * (target_words // 50 )
6871 )
6972 base_prompt = base_prompt + padding_text
7073
@@ -516,8 +519,20 @@ def test_logprobs_WITHOUT_batch_invariance_should_FAIL(backend):
516519 dtype = "bfloat16" ,
517520 )
518521
519- # Use more realistic prompts for better token generation
520- prompts = [_random_prompt (10 , 50 ) for i in range (32 )]
522+ # build ragged prompts to change shapes significantly across BS=1 vs BS=N
523+ long_min = int (os .getenv ("VLLM_MIN_PROMPT" , "768" ))
524+ long_max = int (os .getenv ("VLLM_MAX_PROMPT" , "2048" ))
525+ prompts : list [str ] = []
526+ options = [
527+ (max (long_min , 1536 ), max (long_max , 3072 )), # very long
528+ (max (1024 , long_min ), max (2048 , long_max )), # long
529+ (256 , 512 ), # mid
530+ (10 , 20 ), # short
531+ ]
532+
533+ for _ in range (32 ):
534+ lo , hi = random .choice (options )
535+ prompts .append (_random_prompt (lo , hi ))
521536
522537 sp = SamplingParams (
523538 temperature = 0.6 ,
0 commit comments