diff --git a/.gitignore b/.gitignore index 4458cf81..5d50df6a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ runpod.toml .env test/* vllm-base/vllm-* -.DS_Store \ No newline at end of file +.DS_Store +.ipynb_checkpoints \ No newline at end of file diff --git a/modal-deploy/Sunflower32b-Ultravox/latency_test/context_eng_1.txt b/modal-deploy/Sunflower32b-Ultravox/latency_test/context_eng_1.txt new file mode 100644 index 00000000..844f5c86 --- /dev/null +++ b/modal-deploy/Sunflower32b-Ultravox/latency_test/context_eng_1.txt @@ -0,0 +1,3142 @@ +Dec 17 16:54:26.871 +(APIServer pid=5) INFO 12-17 13:54:26 [logger.py:47] Received request chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. +Dec 17 16:54:34.895 +(APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:54:34 [async_llm.py:344] Added request chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f. +Dec 17 16:54:35.076 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:54:35.132 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:54:35.180 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:54:35.227 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:54:35.274 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:54:35.321 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:54:35.368 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:54:35.415 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:54:35.463 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:54:35.510 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:54:35.557 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:54:35.604 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:54:35.651 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:54:35.698 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:54:35.745 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:54:35.792 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:54:35.839 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:54:35.886 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:54:35.933 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:54:35.981 +(APIServer pid=5) INFO 12-17 13:54:35 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:54:36.027 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:54:36.075 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:54:36.122 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:54:36.168 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:54:36.216 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None (APIServer pid=5) INFO 12-17 13:54:36 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 2.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 0.0%, MM cache hit rate: 0.0% +Dec 17 16:54:36.263 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:54:36.311 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:54:36.358 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:54:36.404 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:54:36.452 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:54:36.499 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:54:36.547 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:54:36.593 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:54:36.641 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:54:36.689 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:54:36.737 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:54:36.785 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:54:36.833 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:54:36.881 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:54:36.928 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:54:36.977 +(APIServer pid=5) INFO 12-17 13:54:36 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:54:37.024 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:54:37.071 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:54:37.119 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:54:37.167 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:54:37.216 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:54:37.265 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:54:37.319 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:54:37.366 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:54:37.419 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:54:37.465 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:54:37.514 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:54:37.561 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:54:37.609 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:54:37.657 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:54:37.708 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:54:37.756 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:54:37.807 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:54:37.852 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:54:37.901 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:54:37.947 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:54:37.995 +(APIServer pid=5) INFO 12-17 13:54:37 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:54:38.042 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:54:38.091 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:54:38.138 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:54:38.187 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:54:38.234 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:54:38.281 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:54:38.329 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:54:38.376 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:54:38.424 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:54:38.471 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:54:38.518 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:54:38.566 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:54:38.613 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:54:38.660 +(APIServer pid=5) INFO 12-17 13:54:38 [logger.py:76] Generated response chatcmpl-5355e7d0c5bf4559b4b4099c14661f4f (streaming complete): output: "You know that feeling right? Where you talk to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:54:38.745 +POST /v1/chat/completions -> 200 OK (duration: 86.7 s, execution: 86.6 s) +Dec 17 16:54:46.219 +(APIServer pid=5) INFO 12-17 13:54:46 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 5.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%, MM cache hit rate: 0.0% +Dec 17 16:54:56.220 +(APIServer pid=5) INFO 12-17 13:54:56 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%, MM cache hit rate: 0.0% +Dec 17 16:55:31.381 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:47] Received request chatcmpl-f51a46a76850442eafd50a562310cea4: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:55:31 [async_llm.py:344] Added request chatcmpl-f51a46a76850442eafd50a562310cea4. +Dec 17 16:55:31.434 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:55:31.482 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:55:31.529 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:55:31.576 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:55:31.624 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:55:31.671 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:55:31.719 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:31.765 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:31.812 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:55:31.860 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:31.907 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:55:31.955 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:55:32.002 +(APIServer pid=5) INFO 12-17 13:55:31 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:32.049 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:55:32.096 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:55:32.143 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:55:32.190 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:55:32.238 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:55:32.285 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:55:32.333 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:55:32.380 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:55:32.427 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:55:32.475 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:55:32.521 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:32.569 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:55:32.616 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:55:32.663 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:55:32.711 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:55:32.758 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:55:32.805 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:55:32.853 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:55:32.899 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:32.947 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:55:32.994 +(APIServer pid=5) INFO 12-17 13:55:32 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:33.041 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:55:33.089 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:55:33.136 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:55:33.182 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:55:33.230 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:33.277 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:55:33.324 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:55:33.371 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:55:33.418 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:55:33.465 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:33.513 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:55:33.560 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:55:33.607 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:55:33.654 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:33.702 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:55:33.749 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:33.795 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:55:33.843 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:55:33.890 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:55:33.937 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:55:33.985 +(APIServer pid=5) INFO 12-17 13:55:33 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:55:34.031 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:55:34.078 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:34.126 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:55:34.173 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:34.221 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:55:34.267 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:34.314 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:55:34.362 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:55:34.408 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:55:34.455 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:34.503 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:55:34.550 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:55:34.597 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:55:34.644 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:55:34.691 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:55:34.739 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:55:34.786 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:55:34.833 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:55:34.880 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:34.927 +(APIServer pid=5) INFO 12-17 13:55:34 [logger.py:76] Generated response chatcmpl-f51a46a76850442eafd50a562310cea4 (streaming complete): output: "You know that feeling right where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:55:35.007 +POST /v1/chat/completions -> 200 OK (duration: 55.2 s, execution: 55.1 s) +Dec 17 16:55:36.223 +(APIServer pid=5) INFO 12-17 13:55:36 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 7.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 45.7%, MM cache hit rate: 50.0% +Dec 17 16:55:39.345 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:47] Received request chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:55:39 [async_llm.py:344] Added request chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7. +Dec 17 16:55:39.399 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:55:39.447 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:55:39.494 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:55:39.541 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:55:39.589 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:55:39.635 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:55:39.682 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:39.730 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:39.777 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:55:39.824 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:39.871 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:55:39.919 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:55:39.965 +(APIServer pid=5) INFO 12-17 13:55:39 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:55:40.012 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:40.060 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:55:40.107 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:55:40.154 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:55:40.201 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:55:40.248 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:55:40.296 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:55:40.342 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:55:40.389 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:55:40.437 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:55:40.484 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:55:40.531 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:40.579 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:55:40.625 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:55:40.672 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:55:40.720 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:55:40.767 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:55:40.814 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:55:40.861 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:55:40.908 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:55:40.956 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:55:41.002 +(APIServer pid=5) INFO 12-17 13:55:40 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:41.049 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:55:41.097 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:55:41.143 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:55:41.191 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:55:41.238 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:41.285 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:55:41.333 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:55:41.380 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:55:41.427 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:55:41.474 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:41.521 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:55:41.569 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:55:41.615 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:55:41.663 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:41.710 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:55:41.757 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:41.804 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:55:41.851 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:55:41.898 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:55:41.945 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:55:41.992 +(APIServer pid=5) INFO 12-17 13:55:41 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:55:42.040 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:55:42.087 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:42.134 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:55:42.181 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:55:42.228 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:55:42.276 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:42.322 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:55:42.370 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:55:42.417 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:55:42.464 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:55:42.511 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:55:42.558 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:55:42.606 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:55:42.653 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:55:42.699 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:55:42.747 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:55:42.794 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:55:42.841 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:55:42.888 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:55:42.935 +(APIServer pid=5) INFO 12-17 13:55:42 [logger.py:76] Generated response chatcmpl-c0ff7f01f42e4f7d8e653daa2af8aff7 (streaming complete): output: "You know that feeling right where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:55:43.016 +POST /v1/chat/completions -> 200 OK (duration: 7.18 s, execution: 7.12 s) +Dec 17 16:55:46.224 +(APIServer pid=5) INFO 12-17 13:55:46 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 7.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 61.0%, MM cache hit rate: 66.7% +Dec 17 16:55:56.225 +(APIServer pid=5) INFO 12-17 13:55:56 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 61.0%, MM cache hit rate: 66.7% +Dec 17 16:56:13.369 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:47] Received request chatcmpl-e5d10ce4837445118e52f36d9bbec655: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:56:13 [async_llm.py:344] Added request chatcmpl-e5d10ce4837445118e52f36d9bbec655. +Dec 17 16:56:13.423 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:56:13.470 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:56:13.517 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:13.564 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:56:13.612 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:56:13.659 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:56:13.705 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:56:13.753 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:13.800 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:13.846 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:56:13.894 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:13.941 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:56:13.989 +(APIServer pid=5) INFO 12-17 13:56:13 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:14.036 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:56:14.083 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:14.130 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:56:14.177 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:56:14.224 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:56:14.272 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:56:14.319 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:56:14.366 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:56:14.414 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:56:14.460 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:56:14.508 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:56:14.555 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:56:14.602 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:14.649 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:56:14.696 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:56:14.743 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:56:14.791 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:56:14.838 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:56:14.886 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:56:14.932 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:56:14.979 +(APIServer pid=5) INFO 12-17 13:56:14 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:15.026 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:56:15.074 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:15.121 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:56:15.167 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:56:15.215 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:56:15.262 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:56:15.310 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:15.357 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:56:15.404 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:56:15.451 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:56:15.498 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:56:15.545 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:15.593 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:56:15.639 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:56:15.686 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:15.734 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:15.781 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' gonna', output_token_ids: [16519], finish_reason: None +Dec 17 16:56:15.827 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:56:15.875 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:56:15.922 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:15.970 +(APIServer pid=5) INFO 12-17 13:56:15 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:16.016 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:56:16.063 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:16.111 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:16.158 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:56:16.205 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:16.227 +(APIServer pid=5) INFO 12-17 13:56:16 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 6.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 68.6%, MM cache hit rate: 75.0% +Dec 17 16:56:16.253 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:56:16.299 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:16.347 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:56:16.394 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:56:16.441 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:16.488 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:16.535 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:56:16.582 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:56:16.630 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:16.677 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:56:16.724 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:56:16.771 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:56:16.818 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:56:16.865 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:56:16.913 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:16.960 +(APIServer pid=5) INFO 12-17 13:56:16 [logger.py:76] Generated response chatcmpl-e5d10ce4837445118e52f36d9bbec655 (streaming complete): output: "You know that feeling right? Where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're gonna dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:56:17.096 +POST /v1/chat/completions -> 200 OK (duration: 33.2 s, execution: 33.1 s) +Dec 17 16:56:20.816 +(APIServer pid=5) INFO 12-17 13:56:20 [logger.py:47] Received request chatcmpl-3006c14a59b54218b62895e1f9fbf197: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:56:20 [async_llm.py:344] Added request chatcmpl-3006c14a59b54218b62895e1f9fbf197. +Dec 17 16:56:20.870 +(APIServer pid=5) INFO 12-17 13:56:20 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:56:20.917 +(APIServer pid=5) INFO 12-17 13:56:20 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:56:20.965 +(APIServer pid=5) INFO 12-17 13:56:20 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:21.011 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:56:21.058 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:56:21.106 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:56:21.153 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:56:21.200 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:21.247 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:56:21.294 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:21.342 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:56:21.389 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:21.435 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:56:21.483 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:21.530 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:56:21.577 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:56:21.624 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:56:21.671 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:56:21.719 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:56:21.766 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:56:21.812 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:56:21.860 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:56:21.907 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:56:21.954 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:56:22.002 +(APIServer pid=5) INFO 12-17 13:56:21 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:22.048 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:56:22.096 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:56:22.143 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:56:22.190 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:56:22.237 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:56:22.284 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:56:22.331 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:56:22.379 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:22.426 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:56:22.472 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:22.520 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:56:22.567 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:56:22.615 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:56:22.662 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:56:22.709 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:22.757 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:56:22.804 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:56:22.851 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:56:22.898 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:56:22.945 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:22.993 +(APIServer pid=5) INFO 12-17 13:56:22 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:56:23.039 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:56:23.086 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:23.134 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:23.181 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:56:23.228 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:23.276 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:56:23.322 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:56:23.369 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:23.417 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:23.464 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:56:23.513 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:23.560 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:23.607 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:56:23.655 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:23.702 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:56:23.750 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:23.797 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:56:23.844 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:56:23.891 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:23.938 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:23.986 +(APIServer pid=5) INFO 12-17 13:56:23 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:56:24.033 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:56:24.080 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:24.128 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:56:24.175 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:56:24.223 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:56:24.270 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:56:24.317 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:56:24.365 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:24.412 +(APIServer pid=5) INFO 12-17 13:56:24 [logger.py:76] Generated response chatcmpl-3006c14a59b54218b62895e1f9fbf197 (streaming complete): output: "You know that feeling right? Where you talk to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:56:24.499 +POST /v1/chat/completions -> 200 OK (duration: 6.32 s, execution: 6.24 s) +Dec 17 16:56:26.228 +(APIServer pid=5) INFO 12-17 13:56:26 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 9.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 73.1%, MM cache hit rate: 80.0% +Dec 17 16:56:28.076 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:47] Received request chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:56:28 [async_llm.py:344] Added request chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7. +Dec 17 16:56:28.130 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:56:28.177 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:56:28.226 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:28.273 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:56:28.320 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:56:28.368 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:56:28.417 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:28.464 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:28.511 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:56:28.558 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:28.606 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:56:28.653 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:28.700 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:28.748 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:56:28.795 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:56:28.843 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:56:28.890 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:56:28.937 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:56:28.985 +(APIServer pid=5) INFO 12-17 13:56:28 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:56:29.032 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:56:29.079 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:56:29.126 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:56:29.173 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:56:29.221 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:29.267 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:56:29.314 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:56:29.362 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:56:29.409 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:56:29.457 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:56:29.503 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:56:29.550 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:56:29.598 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:56:29.645 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:56:29.693 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:29.739 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:56:29.786 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:56:29.834 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:56:29.880 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:56:29.927 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:29.975 +(APIServer pid=5) INFO 12-17 13:56:29 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:56:30.022 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:56:30.070 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:56:30.116 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:56:30.163 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:30.211 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:56:30.257 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:56:30.304 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:30.352 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:30.399 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:56:30.446 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:30.493 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:56:30.540 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:56:30.587 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:30.635 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:56:30.681 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:56:30.729 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:56:30.776 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:30.823 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:56:30.870 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:56:30.917 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:56:30.964 +(APIServer pid=5) INFO 12-17 13:56:30 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:31.012 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:56:31.058 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:56:31.105 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:56:31.153 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:56:31.200 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:56:31.246 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:56:31.294 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:56:31.341 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:56:31.389 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:56:31.436 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:56:31.483 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:56:31.530 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:56:31.577 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:56:31.625 +(APIServer pid=5) INFO 12-17 13:56:31 [logger.py:76] Generated response chatcmpl-4c3ddb64d5424534b5aed6af1eccb1c7 (streaming complete): output: "You know that feeling right where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:56:31.761 +POST /v1/chat/completions -> 200 OK (duration: 6.38 s, execution: 6.27 s) +Dec 17 16:56:36.229 +(APIServer pid=5) INFO 12-17 13:56:36 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 7.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 76.2%, MM cache hit rate: 83.3% +Dec 17 16:56:46.229 +(APIServer pid=5) INFO 12-17 13:56:46 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 76.2%, MM cache hit rate: 83.3% +Dec 17 16:57:06.922 +(APIServer pid=5) INFO 12-17 13:57:06 [logger.py:47] Received request chatcmpl-dca40b0e47884e47b2a6aa4a994623af: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:06 [async_llm.py:344] Added request chatcmpl-dca40b0e47884e47b2a6aa4a994623af. +Dec 17 16:57:06.976 +(APIServer pid=5) INFO 12-17 13:57:06 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:07.022 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:07.069 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:07.117 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:07.163 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:07.211 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:57:07.258 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:57:07.305 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:07.352 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:07.399 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:07.447 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:07.494 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:07.542 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:07.588 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:57:07.635 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:07.683 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:07.730 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:07.777 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:07.824 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:07.871 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:07.918 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:07.965 +(APIServer pid=5) INFO 12-17 13:57:07 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:08.013 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:08.059 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:08.107 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:08.154 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:08.200 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:08.248 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:08.295 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:08.343 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:08.389 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:08.436 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:08.484 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:08.531 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:08.577 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:08.625 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:08.672 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:08.720 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:08.767 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:08.813 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:08.861 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:08.908 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:08.956 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:09.002 +(APIServer pid=5) INFO 12-17 13:57:08 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:09.049 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:09.097 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:09.144 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:09.191 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:09.238 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:09.285 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:09.332 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:09.379 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:09.426 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:09.474 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:09.521 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:09.568 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:09.615 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:09.662 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:09.709 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:09.756 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:09.804 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:09.850 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:09.897 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:09.945 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:09.992 +(APIServer pid=5) INFO 12-17 13:57:09 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:10.039 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:10.086 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:10.133 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:10.180 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:10.228 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:10.275 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:10.321 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:10.369 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:10.416 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:10.464 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:10.510 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:10.557 +(APIServer pid=5) INFO 12-17 13:57:10 [logger.py:76] Generated response chatcmpl-dca40b0e47884e47b2a6aa4a994623af (streaming complete): output: "You know that feeling right? Where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:10.621 +POST /v1/chat/completions -> 200 OK (duration: 38.1 s, execution: 38.0 s) +Dec 17 16:57:14.621 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:47] Received request chatcmpl-3622d4425c614d919e7baee5c8b5ca6c: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:14 [async_llm.py:344] Added request chatcmpl-3622d4425c614d919e7baee5c8b5ca6c. +Dec 17 16:57:14.675 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:14.722 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:14.770 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:14.817 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:14.863 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:14.911 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:57:14.958 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:57:15.005 +(APIServer pid=5) INFO 12-17 13:57:14 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:15.052 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:15.099 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:15.147 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:15.193 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:15.241 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:15.288 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:57:15.334 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:15.382 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:15.429 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:15.477 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:15.523 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:15.570 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:15.618 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:15.665 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:15.711 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:15.759 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:15.806 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:15.853 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:15.900 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:15.947 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:15.995 +(APIServer pid=5) INFO 12-17 13:57:15 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:16.041 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:16.089 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:16.136 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:16.183 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:16.231 +(APIServer pid=5) INFO 12-17 13:57:16 [loggers.py:236] Engine 000: Avg prompt throughput: 35.0 tokens/s, Avg generation throughput: 11.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 80.0%, MM cache hit rate: 87.5% (APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:16.278 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:16.325 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:16.372 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:16.419 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:16.466 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:16.513 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:16.560 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:16.608 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:16.654 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:16.702 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:16.749 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:16.796 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:16.843 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:16.890 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:16.937 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:16.985 +(APIServer pid=5) INFO 12-17 13:57:16 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:17.032 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' gonna', output_token_ids: [16519], finish_reason: None +Dec 17 16:57:17.079 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:17.126 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:17.173 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:17.220 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:17.267 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:17.314 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:17.362 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:17.408 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:17.456 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:17.503 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:17.550 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:17.597 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:17.644 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:17.691 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:17.739 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:17.785 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:17.833 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:17.880 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:17.926 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:17.974 +(APIServer pid=5) INFO 12-17 13:57:17 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:18.021 +(APIServer pid=5) INFO 12-17 13:57:18 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:18.069 +(APIServer pid=5) INFO 12-17 13:57:18 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:18.115 +(APIServer pid=5) INFO 12-17 13:57:18 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:18.162 +(APIServer pid=5) INFO 12-17 13:57:18 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:18.210 +(APIServer pid=5) INFO 12-17 13:57:18 [logger.py:76] Generated response chatcmpl-3622d4425c614d919e7baee5c8b5ca6c (streaming complete): output: "You know that feeling right? Where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're gonna dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:18.318 +POST /v1/chat/completions -> 200 OK (duration: 6.84 s, execution: 6.72 s) +Dec 17 16:57:21.380 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:47] Received request chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:21 [async_llm.py:344] Added request chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178. +Dec 17 16:57:21.433 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:21.481 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:21.527 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:21.574 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:21.622 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:21.669 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:57:21.715 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:21.763 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:21.810 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:21.856 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:21.904 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:21.951 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:21.999 +(APIServer pid=5) INFO 12-17 13:57:21 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:57:22.045 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:22.092 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:22.140 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:22.187 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:22.233 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:22.281 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:22.328 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:22.376 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:22.422 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:22.469 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:22.517 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:22.564 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:22.610 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:22.658 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:22.705 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:22.752 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:22.799 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:22.847 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:22.893 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:22.940 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:22.988 +(APIServer pid=5) INFO 12-17 13:57:22 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:23.035 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:23.082 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:23.129 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:23.176 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:23.224 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:23.270 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:23.317 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:23.365 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:23.412 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:23.460 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:23.507 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:23.553 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:23.601 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:23.648 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:23.695 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:23.742 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:23.790 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:23.837 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:23.883 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:23.931 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:23.978 +(APIServer pid=5) INFO 12-17 13:57:23 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:24.025 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:24.072 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:24.119 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:24.166 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:24.213 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:24.260 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:24.308 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:24.354 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:24.401 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:24.449 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:24.496 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:24.542 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:24.590 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:24.637 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:24.684 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:24.731 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:24.778 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:24.826 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:24.873 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:24.920 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:24.968 +(APIServer pid=5) INFO 12-17 13:57:24 [logger.py:76] Generated response chatcmpl-6f090bc5e23741b5bd5c3f78f1f9b178 (streaming complete): output: "You know that feeling right where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:25.067 +POST /v1/chat/completions -> 200 OK (duration: 5.94 s, execution: 5.86 s) +Dec 17 16:57:26.231 +(APIServer pid=5) INFO 12-17 13:57:26 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 11.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 81.3%, MM cache hit rate: 88.9% +Dec 17 16:57:28.986 +(APIServer pid=5) INFO 12-17 13:57:28 [logger.py:47] Received request chatcmpl-0d64343c2236496a99e1ed300365a690: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:28 [async_llm.py:344] Added request chatcmpl-0d64343c2236496a99e1ed300365a690. +Dec 17 16:57:29.039 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:29.087 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:29.134 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:29.180 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:29.228 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:29.275 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:57:29.322 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:57:29.369 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:29.416 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:29.464 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:29.511 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:29.557 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:29.605 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:57:29.652 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:29.699 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:29.746 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:29.793 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:29.841 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:29.887 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:29.935 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:29.982 +(APIServer pid=5) INFO 12-17 13:57:29 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:30.028 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:30.076 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:30.123 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:30.170 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:30.217 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:30.264 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:30.312 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:30.359 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:30.405 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:30.453 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:30.500 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:30.547 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:30.594 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:30.641 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:30.689 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:30.735 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:30.782 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:30.830 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:30.877 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:30.923 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:30.971 +(APIServer pid=5) INFO 12-17 13:57:30 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:31.018 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:31.064 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:31.112 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:31.159 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:31.206 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:31.253 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:31.300 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:31.348 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:31.395 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:31.441 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:31.489 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:31.536 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:31.584 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:31.630 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:31.677 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:31.725 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:31.772 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:31.819 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:31.866 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:31.913 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:31.960 +(APIServer pid=5) INFO 12-17 13:57:31 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:32.007 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:32.055 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:32.102 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:32.148 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:32.196 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:32.243 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:32.290 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:32.339 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:32.386 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:32.433 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:32.480 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:32.527 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:32.575 +(APIServer pid=5) INFO 12-17 13:57:32 [logger.py:76] Generated response chatcmpl-0d64343c2236496a99e1ed300365a690 (streaming complete): output: "You know that feeling right? Where you talk to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:32.650 +POST /v1/chat/completions -> 200 OK (duration: 6.69 s, execution: 6.60 s) +Dec 17 16:57:36.108 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:47] Received request chatcmpl-78a1f84734384859989715e46b235e6e: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:36 [async_llm.py:344] Added request chatcmpl-78a1f84734384859989715e46b235e6e. +Dec 17 16:57:36.162 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:36.209 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:36.232 +(APIServer pid=5) INFO 12-17 13:57:36 [loggers.py:236] Engine 000: Avg prompt throughput: 35.0 tokens/s, Avg generation throughput: 7.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 83.1%, MM cache hit rate: 90.9% +Dec 17 16:57:36.257 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:36.305 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:36.352 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:36.400 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:57:36.447 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:36.495 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:36.541 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:36.588 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:36.636 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:36.683 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:36.731 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:36.777 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:36.824 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:36.872 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:36.918 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:36.966 +(APIServer pid=5) INFO 12-17 13:57:36 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:37.013 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:37.060 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:37.107 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:37.154 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:37.201 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:37.249 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:37.295 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:37.342 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:37.390 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:37.436 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:37.483 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:37.531 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:37.577 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:37.625 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:37.672 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:37.719 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:37.766 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:37.813 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:37.860 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:37.908 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:37.955 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:38.003 +(APIServer pid=5) INFO 12-17 13:57:37 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:38.050 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:38.096 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:38.144 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:38.191 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:38.240 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:38.287 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:38.335 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:38.383 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:38.430 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:38.478 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:38.525 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:38.572 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:38.620 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:38.667 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:38.714 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:38.762 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:38.809 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:38.858 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:38.905 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:38.952 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:39.000 +(APIServer pid=5) INFO 12-17 13:57:38 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:39.047 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:39.093 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:39.141 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:39.188 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:39.235 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:39.283 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:39.330 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:39.378 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:39.425 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:39.472 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:39.521 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:39.568 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:39.615 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:39.744 +POST /v1/chat/completions -> 200 OK (duration: 6.29 s, execution: 6.15 s) +Dec 17 16:57:39.663 +(APIServer pid=5) INFO 12-17 13:57:39 [logger.py:76] Generated response chatcmpl-78a1f84734384859989715e46b235e6e (streaming complete): output: "You know that feeling right where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:43.565 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:47] Received request chatcmpl-ad05022250124ab49ec9a2962f3eacd0: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:57:43 [async_llm.py:344] Added request chatcmpl-ad05022250124ab49ec9a2962f3eacd0. +Dec 17 16:57:43.618 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:57:43.665 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:57:43.713 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:43.760 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:57:43.806 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:57:43.854 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:57:43.901 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:43.948 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:43.996 +(APIServer pid=5) INFO 12-17 13:57:43 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:44.042 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:57:44.089 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:44.137 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:57:44.183 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:44.231 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:57:44.278 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:57:44.324 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:57:44.372 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:57:44.419 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:57:44.465 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:57:44.513 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:57:44.560 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:57:44.606 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:57:44.654 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:57:44.701 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:44.749 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:57:44.795 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:57:44.842 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:57:44.890 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:57:44.936 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:57:44.983 +(APIServer pid=5) INFO 12-17 13:57:44 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:57:45.031 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:57:45.077 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:57:45.125 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:57:45.172 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:45.218 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:57:45.266 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:57:45.313 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:57:45.360 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:57:45.407 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:45.454 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:57:45.501 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:57:45.548 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:57:45.595 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:57:45.643 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:45.689 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:57:45.736 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:57:45.784 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:45.830 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:45.878 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:45.925 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:45.971 +(APIServer pid=5) INFO 12-17 13:57:45 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:57:46.019 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:57:46.066 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:46.113 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:57:46.160 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:57:46.207 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:57:46.233 +(APIServer pid=5) INFO 12-17 13:57:46 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 12.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 83.8%, MM cache hit rate: 91.7% +Dec 17 16:57:46.254 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:46.301 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:57:46.348 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:57:46.396 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:57:46.442 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:46.490 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:57:46.537 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:57:46.583 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:57:46.631 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:57:46.678 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:57:46.725 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:57:46.772 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:57:46.819 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:57:46.867 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:57:46.913 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:57:46.960 +(APIServer pid=5) INFO 12-17 13:57:46 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:57:47.008 +(APIServer pid=5) INFO 12-17 13:57:47 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:57:47.055 +(APIServer pid=5) INFO 12-17 13:57:47 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:57:47.102 +(APIServer pid=5) INFO 12-17 13:57:47 [logger.py:76] Generated response chatcmpl-ad05022250124ab49ec9a2962f3eacd0 (streaming complete): output: "You know that feeling right where you talk to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:57:47.159 +POST /v1/chat/completions -> 200 OK (duration: 6.58 s, execution: 6.51 s) +Dec 17 16:57:56.233 +(APIServer pid=5) INFO 12-17 13:57:56 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 83.8%, MM cache hit rate: 91.7% +Dec 17 16:58:06.235 +(APIServer pid=5) INFO 12-17 13:58:06 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 83.8%, MM cache hit rate: 91.7% +Dec 17 16:58:24.471 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:47] Received request chatcmpl-df13b706c9c940d18e33bb9c022987a7: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:58:24 [async_llm.py:344] Added request chatcmpl-df13b706c9c940d18e33bb9c022987a7. +Dec 17 16:58:24.525 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:58:24.573 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:58:24.620 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:24.666 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:58:24.714 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:58:24.761 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:58:24.809 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:24.856 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:24.903 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:24.950 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:24.997 +(APIServer pid=5) INFO 12-17 13:58:24 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:58:25.045 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:25.092 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:58:25.139 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:25.187 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:58:25.234 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:58:25.280 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:58:25.328 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:58:25.375 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:58:25.423 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:58:25.470 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:58:25.516 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:58:25.564 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:58:25.611 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:58:25.658 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:25.705 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:58:25.752 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:58:25.799 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:58:25.846 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:58:25.894 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:58:25.940 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:58:25.987 +(APIServer pid=5) INFO 12-17 13:58:25 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:58:26.035 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:26.081 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:26.129 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:26.176 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:58:26.222 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:58:26.237 +(APIServer pid=5) INFO 12-17 13:58:26 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 3.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 84.4%, MM cache hit rate: 92.3% +Dec 17 16:58:26.270 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:58:26.317 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:26.365 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:26.411 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:58:26.458 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:58:26.506 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:58:26.552 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:58:26.600 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:26.647 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:58:26.694 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:58:26.741 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:26.788 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:26.836 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:26.882 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:26.929 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:58:26.977 +(APIServer pid=5) INFO 12-17 13:58:26 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:58:27.023 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:27.070 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:27.118 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:58:27.165 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:27.212 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:27.259 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:58:27.306 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:27.353 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:27.400 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:27.448 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:58:27.495 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:58:27.541 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:27.589 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:27.636 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:58:27.682 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:58:27.730 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:27.777 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:58:27.826 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:58:27.873 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:58:27.919 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:58:27.967 +(APIServer pid=5) INFO 12-17 13:58:27 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:58:28.014 +(APIServer pid=5) INFO 12-17 13:58:28 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:28.062 +(APIServer pid=5) INFO 12-17 13:58:28 [logger.py:76] Generated response chatcmpl-df13b706c9c940d18e33bb9c022987a7 (streaming complete): output: "You know that feeling right where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:58:28.159 +POST /v1/chat/completions -> 200 OK (duration: 40.1 s, execution: 40.0 s) +Dec 17 16:58:34.169 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:47] Received request chatcmpl-ee8747afc8544815b912b298bfbf400a: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:58:34 [async_llm.py:344] Added request chatcmpl-ee8747afc8544815b912b298bfbf400a. +Dec 17 16:58:34.223 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:58:34.270 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:58:34.316 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:34.364 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:58:34.411 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:58:34.459 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:58:34.505 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:34.552 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:34.600 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:34.646 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:34.693 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:58:34.741 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:34.788 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:34.834 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:58:34.882 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:58:34.929 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:58:34.975 +(APIServer pid=5) INFO 12-17 13:58:34 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:58:35.023 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:58:35.070 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:58:35.117 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:58:35.164 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:58:35.211 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:58:35.259 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:58:35.305 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:35.352 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:58:35.400 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:58:35.446 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:58:35.494 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:58:35.541 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:58:35.588 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:58:35.635 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:58:35.682 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:35.730 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:35.776 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:35.823 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:58:35.871 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:58:35.918 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:58:35.964 +(APIServer pid=5) INFO 12-17 13:58:35 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:36.012 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:36.059 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:58:36.106 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:58:36.153 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:58:36.200 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:58:36.238 +(APIServer pid=5) INFO 12-17 13:58:36 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 8.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 84.9%, MM cache hit rate: 92.9% +Dec 17 16:58:36.248 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:36.295 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:58:36.341 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:58:36.389 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:36.436 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:36.482 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' gonna', output_token_ids: [16519], finish_reason: None +Dec 17 16:58:36.530 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:58:36.577 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:58:36.623 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:36.671 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:36.718 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:58:36.766 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:36.812 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:36.859 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:58:36.907 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:36.953 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:37.001 +(APIServer pid=5) INFO 12-17 13:58:36 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:37.047 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:58:37.094 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:58:37.142 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:37.189 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:37.235 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:58:37.283 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:58:37.330 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:37.377 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:58:37.424 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:58:37.471 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:58:37.519 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:58:37.566 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:58:37.612 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:37.660 +(APIServer pid=5) INFO 12-17 13:58:37 [logger.py:76] Generated response chatcmpl-ee8747afc8544815b912b298bfbf400a (streaming complete): output: "You know that feeling right where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're gonna dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:58:37.716 +POST /v1/chat/completions -> 200 OK (duration: 8.79 s, execution: 8.68 s) +Dec 17 16:58:41.496 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:47] Received request chatcmpl-f665a299b24145728ffeeb8302df1436: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:58:41 [async_llm.py:344] Added request chatcmpl-f665a299b24145728ffeeb8302df1436. +Dec 17 16:58:41.548 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:58:41.596 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:58:41.643 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:41.690 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:58:41.737 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:58:41.784 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:58:41.832 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:41.878 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:41.925 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:41.973 +(APIServer pid=5) INFO 12-17 13:58:41 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:42.019 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:58:42.066 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:42.114 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:58:42.160 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:42.207 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:58:42.255 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:58:42.301 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:58:42.349 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:58:42.396 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:58:42.442 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:58:42.490 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:58:42.537 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:58:42.584 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:58:42.631 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:58:42.678 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:42.726 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:58:42.772 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:58:42.819 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:58:42.867 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:58:42.913 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:58:42.961 +(APIServer pid=5) INFO 12-17 13:58:42 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:58:43.008 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:58:43.054 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:43.102 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:43.149 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:43.196 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:58:43.243 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:58:43.290 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:58:43.337 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:43.384 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:43.432 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:58:43.478 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:58:43.525 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:58:43.573 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:58:43.619 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:43.666 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:58:43.714 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:58:43.760 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:43.808 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:43.855 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:43.902 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:43.949 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:58:43.996 +(APIServer pid=5) INFO 12-17 13:58:43 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:58:44.043 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:44.091 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:44.137 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:58:44.185 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:44.232 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:44.278 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:58:44.326 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:44.373 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:44.420 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:44.467 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:58:44.514 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:58:44.561 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:44.608 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:44.655 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:58:44.703 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:58:44.749 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:44.796 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:58:44.844 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:58:44.890 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:58:44.937 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:58:44.985 +(APIServer pid=5) INFO 12-17 13:58:44 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:58:45.031 +(APIServer pid=5) INFO 12-17 13:58:45 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:45.080 +(APIServer pid=5) INFO 12-17 13:58:45 [logger.py:76] Generated response chatcmpl-f665a299b24145728ffeeb8302df1436 (streaming complete): output: "You know that feeling right where you are talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we are going to dive into how we fixed that. We are going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:58:45.154 +POST /v1/chat/completions -> 200 OK (duration: 6.62 s, execution: 6.54 s) +Dec 17 16:58:46.238 +(APIServer pid=5) INFO 12-17 13:58:46 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 10.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 85.3%, MM cache hit rate: 93.3% +Dec 17 16:58:49.033 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:47] Received request chatcmpl-40d80710ec3745b994acaba3a0c0948d: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:58:49 [async_llm.py:344] Added request chatcmpl-40d80710ec3745b994acaba3a0c0948d. +Dec 17 16:58:49.086 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:58:49.134 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:58:49.181 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:49.227 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:58:49.275 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:58:49.322 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:58:49.370 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:49.416 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:49.463 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:49.511 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:49.558 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:58:49.604 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:49.652 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:49.699 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:58:49.746 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:58:49.793 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:58:49.840 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:58:49.887 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:58:49.934 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:58:49.982 +(APIServer pid=5) INFO 12-17 13:58:49 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:58:50.028 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:58:50.075 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:58:50.123 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:58:50.170 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:50.216 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:58:50.264 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:58:50.311 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:58:50.357 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:58:50.405 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:58:50.452 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:58:50.500 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:58:50.546 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:50.593 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:50.641 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:50.687 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:58:50.734 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:58:50.782 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:58:50.828 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:50.876 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:50.923 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:58:50.969 +(APIServer pid=5) INFO 12-17 13:58:50 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:58:51.017 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:58:51.064 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:58:51.111 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:51.158 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:58:51.205 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:58:51.252 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:51.299 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:51.346 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:51.394 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:51.440 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:58:51.488 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:58:51.534 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:51.581 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:51.629 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:58:51.676 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:51.722 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:51.770 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:58:51.817 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:51.864 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:51.911 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:51.958 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:58:52.005 +(APIServer pid=5) INFO 12-17 13:58:51 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:58:52.052 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:52.100 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:52.146 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:58:52.193 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:58:52.240 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:52.287 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:58:52.335 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:58:52.382 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:58:52.428 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:58:52.476 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:58:52.522 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:52.570 +(APIServer pid=5) INFO 12-17 13:58:52 [logger.py:76] Generated response chatcmpl-40d80710ec3745b994acaba3a0c0948d (streaming complete): output: "You know that feeling right where you are talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:58:52.634 +POST /v1/chat/completions -> 200 OK (duration: 6.59 s, execution: 6.48 s) +Dec 17 16:58:56.196 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:47] Received request chatcmpl-1badf7acf8f84d03a02ab3ec86a46394: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:58:56 [async_llm.py:344] Added request chatcmpl-1badf7acf8f84d03a02ab3ec86a46394. +Dec 17 16:58:56.239 +(APIServer pid=5) INFO 12-17 13:58:56 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 7.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 85.7%, MM cache hit rate: 93.8% +Dec 17 16:58:56.250 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:58:56.296 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:58:56.343 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:56.391 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:58:56.437 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:58:56.485 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:58:56.532 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:56.578 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:56.626 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:56.673 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:56.719 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:58:56.767 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:56.814 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:58:56.860 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:56.908 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:58:56.955 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:58:57.003 +(APIServer pid=5) INFO 12-17 13:58:56 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:58:57.049 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:58:57.096 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:58:57.144 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:58:57.190 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:58:57.238 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:58:57.284 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:58:57.331 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:58:57.379 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:57.426 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:58:57.472 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:58:57.520 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:58:57.567 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:58:57.615 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:58:57.661 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:58:57.708 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:58:57.756 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:58:57.802 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:58:57.849 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:57.897 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:58:57.943 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:58:57.990 +(APIServer pid=5) INFO 12-17 13:58:57 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:58:58.038 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:58:58.084 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:58.132 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:58:58.179 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:58:58.226 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:58:58.273 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:58:58.320 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:58.368 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:58:58.415 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:58:58.461 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:58.509 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:58.556 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:58.603 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:58.650 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:58:58.697 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:58:58.745 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:58.791 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:58:58.838 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:58:58.886 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:58:58.932 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:58.980 +(APIServer pid=5) INFO 12-17 13:58:58 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:58:59.027 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:58:59.073 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:58:59.121 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:59.168 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:58:59.215 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:58:59.262 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:58:59.309 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:58:59.357 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:58:59.403 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:58:59.450 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:58:59.498 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:58:59.544 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:58:59.591 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:58:59.639 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:58:59.685 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:58:59.733 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:58:59.780 +(APIServer pid=5) INFO 12-17 13:58:59 [logger.py:76] Generated response chatcmpl-1badf7acf8f84d03a02ab3ec86a46394 (streaming complete): output: "You know that feeling right where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:58:59.841 +POST /v1/chat/completions -> 200 OK (duration: 6.10 s, execution: 6.00 s) +Dec 17 16:59:03.363 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:47] Received request chatcmpl-7104a022f6814bc69aef32b61eb90e42: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:59:03 [async_llm.py:344] Added request chatcmpl-7104a022f6814bc69aef32b61eb90e42. +Dec 17 16:59:03.417 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:59:03.464 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:59:03.512 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:03.558 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:59:03.605 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:59:03.653 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:59:03.700 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:03.747 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:03.794 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:03.841 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:03.888 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:59:03.935 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:03.982 +(APIServer pid=5) INFO 12-17 13:59:03 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ',', output_token_ids: [11], finish_reason: None +Dec 17 16:59:04.029 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:04.076 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:59:04.124 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:59:04.170 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:59:04.217 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:59:04.265 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:59:04.312 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:59:04.360 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:59:04.406 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:59:04.453 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:59:04.501 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:59:04.548 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:04.594 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:59:04.642 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:59:04.689 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:59:04.737 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:59:04.783 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:59:04.830 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:59:04.878 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:59:04.924 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:04.971 +(APIServer pid=5) INFO 12-17 13:59:04 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:59:05.019 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:05.066 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:59:05.113 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:59:05.160 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:59:05.207 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:05.255 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:05.301 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:59:05.348 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:59:05.396 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:59:05.442 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:59:05.489 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:05.537 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:59:05.584 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:59:05.630 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:05.678 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:05.725 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' gonna', output_token_ids: [16519], finish_reason: None +Dec 17 16:59:05.772 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:59:05.819 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:59:05.866 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:05.913 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:05.960 +(APIServer pid=5) INFO 12-17 13:59:05 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:59:06.007 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:06.055 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:06.101 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:59:06.148 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:06.196 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:59:06.240 +(APIServer pid=5) INFO 12-17 13:59:06 [loggers.py:236] Engine 000: Avg prompt throughput: 35.0 tokens/s, Avg generation throughput: 13.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 86.3%, MM cache hit rate: 94.4% (APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:06.289 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:59:06.337 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:59:06.383 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:06.430 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:06.478 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:59:06.524 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:59:06.572 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:06.619 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:59:06.666 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:59:06.713 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:59:06.760 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:59:06.807 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:59:06.854 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:06.901 +(APIServer pid=5) INFO 12-17 13:59:06 [logger.py:76] Generated response chatcmpl-7104a022f6814bc69aef32b61eb90e42 (streaming complete): output: "You know that feeling right where you're talking to an AI, you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're gonna dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:59:07.023 +POST /v1/chat/completions -> 200 OK (duration: 6.38 s, execution: 6.29 s) +Dec 17 16:59:10.335 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:47] Received request chatcmpl-a1e6318d933544eba5802e3c15741543: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:59:10 [async_llm.py:344] Added request chatcmpl-a1e6318d933544eba5802e3c15741543. +Dec 17 16:59:10.387 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:59:10.435 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:59:10.482 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:10.528 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:59:10.576 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:59:10.623 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:59:10.671 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' Where', output_token_ids: [10967], finish_reason: None +Dec 17 16:59:10.717 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:10.764 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:10.812 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:10.858 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:10.905 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:59:10.953 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:10.999 +(APIServer pid=5) INFO 12-17 13:59:10 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:11.047 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:59:11.094 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:59:11.141 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:59:11.188 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:59:11.235 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:59:11.282 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:59:11.329 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:59:11.376 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:59:11.423 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:59:11.471 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:59:11.517 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:11.565 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:59:11.612 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:59:11.659 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:59:11.706 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:59:11.753 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:59:11.801 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:59:11.847 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:59:11.894 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:11.942 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:59:11.988 +(APIServer pid=5) INFO 12-17 13:59:11 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:12.036 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:59:12.083 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:59:12.129 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:59:12.177 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:12.224 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:12.270 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:59:12.318 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:59:12.365 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:59:12.412 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:59:12.459 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:12.506 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:59:12.554 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:59:12.600 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:12.647 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:12.695 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:59:12.741 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:12.789 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:59:12.836 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:59:12.882 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:12.930 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:12.977 +(APIServer pid=5) INFO 12-17 13:59:12 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:59:13.024 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:13.071 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:13.118 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:59:13.165 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:13.212 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:59:13.260 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:13.306 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:59:13.354 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:59:13.401 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:13.448 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:13.495 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:59:13.542 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:59:13.590 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:13.636 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:59:13.684 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:59:13.731 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:59:13.778 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:59:13.824 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:59:13.872 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:13.919 +(APIServer pid=5) INFO 12-17 13:59:13 [logger.py:76] Generated response chatcmpl-a1e6318d933544eba5802e3c15741543 (streaming complete): output: "You know that feeling right? Where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:59:14.033 +POST /v1/chat/completions -> 200 OK (duration: 6.26 s, execution: 6.15 s) +Dec 17 16:59:16.241 +(APIServer pid=5) INFO 12-17 13:59:16 [loggers.py:236] Engine 000: Avg prompt throughput: 17.5 tokens/s, Avg generation throughput: 9.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 86.6%, MM cache hit rate: 94.7% +Dec 17 16:59:17.664 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:47] Received request chatcmpl-039ec1a665a94dd693bd94739d6dbf95: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.4.243:18400 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:59:17 [async_llm.py:344] Added request chatcmpl-039ec1a665a94dd693bd94739d6dbf95. +Dec 17 16:59:17.718 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: 'You', output_token_ids: [2610], finish_reason: None +Dec 17 16:59:17.765 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' know', output_token_ids: [1414], finish_reason: None +Dec 17 16:59:17.812 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:17.859 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' feeling', output_token_ids: [8266], finish_reason: None +Dec 17 16:59:17.906 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' right', output_token_ids: [1290], finish_reason: None +Dec 17 16:59:17.954 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' where', output_token_ids: [1380], finish_reason: None +Dec 17 16:59:18.000 +(APIServer pid=5) INFO 12-17 13:59:17 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:18.047 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:18.095 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:18.141 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:18.188 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' an', output_token_ids: [458], finish_reason: None +Dec 17 16:59:18.236 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:18.283 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:18.329 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' have', output_token_ids: [614], finish_reason: None +Dec 17 16:59:18.377 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' this', output_token_ids: [419], finish_reason: None +Dec 17 16:59:18.424 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' amazing', output_token_ids: [7897], finish_reason: None +Dec 17 16:59:18.472 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' detailed', output_token_ids: [11682], finish_reason: None +Dec 17 16:59:18.518 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' conversation', output_token_ids: [10435], finish_reason: None +Dec 17 16:59:18.565 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' and', output_token_ids: [323], finish_reason: None +Dec 17 16:59:18.613 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' then', output_token_ids: [1221], finish_reason: None +Dec 17 16:59:18.659 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' the', output_token_ids: [279], finish_reason: None +Dec 17 16:59:18.707 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' next', output_token_ids: [1790], finish_reason: None +Dec 17 16:59:18.754 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' time', output_token_ids: [882], finish_reason: None +Dec 17 16:59:18.800 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:18.848 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' chat', output_token_ids: [6236], finish_reason: None +Dec 17 16:59:18.895 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' it', output_token_ids: [432], finish_reason: None +Dec 17 16:59:18.942 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' has', output_token_ids: [702], finish_reason: None +Dec 17 16:59:18.989 +(APIServer pid=5) INFO 12-17 13:59:18 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' absolutely', output_token_ids: [10875], finish_reason: None +Dec 17 16:59:19.036 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' no', output_token_ids: [902], finish_reason: None +Dec 17 16:59:19.084 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' idea', output_token_ids: [4522], finish_reason: None +Dec 17 16:59:19.131 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' who', output_token_ids: [879], finish_reason: None +Dec 17 16:59:19.177 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' you', output_token_ids: [498], finish_reason: None +Dec 17 16:59:19.225 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' are', output_token_ids: [525], finish_reason: None +Dec 17 16:59:19.272 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:19.320 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' It', output_token_ids: [1084], finish_reason: None +Dec 17 16:59:19.366 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: "'s", output_token_ids: [594], finish_reason: None +Dec 17 16:59:19.413 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' like', output_token_ids: [1075], finish_reason: None +Dec 17 16:59:19.461 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' talking', output_token_ids: [7404], finish_reason: None +Dec 17 16:59:19.508 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:19.554 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' someone', output_token_ids: [4325], finish_reason: None +Dec 17 16:59:19.602 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' with', output_token_ids: [448], finish_reason: None +Dec 17 16:59:19.649 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' am', output_token_ids: [1079], finish_reason: None +Dec 17 16:59:19.696 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: 'nesia', output_token_ids: [97475], finish_reason: None +Dec 17 16:59:19.743 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:19.791 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' So', output_token_ids: [2055], finish_reason: None +Dec 17 16:59:19.838 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' today', output_token_ids: [3351], finish_reason: None +Dec 17 16:59:19.885 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:19.933 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:19.979 +(APIServer pid=5) INFO 12-17 13:59:19 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:59:20.027 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:20.074 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' dive', output_token_ids: [29863], finish_reason: None +Dec 17 16:59:20.121 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' into', output_token_ids: [1119], finish_reason: None +Dec 17 16:59:20.168 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:20.215 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' we', output_token_ids: [582], finish_reason: None +Dec 17 16:59:20.263 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' fixed', output_token_ids: [8356], finish_reason: None +Dec 17 16:59:20.310 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' that', output_token_ids: [429], finish_reason: None +Dec 17 16:59:20.356 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:20.404 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' We', output_token_ids: [1205], finish_reason: None +Dec 17 16:59:20.451 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: "'re", output_token_ids: [2299], finish_reason: None +Dec 17 16:59:20.498 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' going', output_token_ids: [2087], finish_reason: None +Dec 17 16:59:20.545 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:20.592 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' talk', output_token_ids: [3061], finish_reason: None +Dec 17 16:59:20.640 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' about', output_token_ids: [911], finish_reason: None +Dec 17 16:59:20.686 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' how', output_token_ids: [1246], finish_reason: None +Dec 17 16:59:20.733 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' to', output_token_ids: [311], finish_reason: None +Dec 17 16:59:20.781 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' give', output_token_ids: [2968], finish_reason: None +Dec 17 16:59:20.827 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' our', output_token_ids: [1039], finish_reason: None +Dec 17 16:59:20.875 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' AI', output_token_ids: [15235], finish_reason: None +Dec 17 16:59:20.923 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' agents', output_token_ids: [13009], finish_reason: None +Dec 17 16:59:20.971 +(APIServer pid=5) INFO 12-17 13:59:20 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' a', output_token_ids: [264], finish_reason: None +Dec 17 16:59:21.017 +(APIServer pid=5) INFO 12-17 13:59:21 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' real', output_token_ids: [1931], finish_reason: None +Dec 17 16:59:21.064 +(APIServer pid=5) INFO 12-17 13:59:21 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' working', output_token_ids: [3238], finish_reason: None +Dec 17 16:59:21.112 +(APIServer pid=5) INFO 12-17 13:59:21 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: ' memory', output_token_ids: [4938], finish_reason: None +Dec 17 16:59:21.159 +(APIServer pid=5) INFO 12-17 13:59:21 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming delta): output: '.', output_token_ids: [13], finish_reason: None +Dec 17 16:59:21.206 +(APIServer pid=5) INFO 12-17 13:59:21 [logger.py:76] Generated response chatcmpl-039ec1a665a94dd693bd94739d6dbf95 (streaming complete): output: "You know that feeling right where you're talking to an AI you have this amazing detailed conversation and then the next time you chat it has absolutely no idea who you are. It's like talking to someone with amnesia. So today we're going to dive into how we fixed that. We're going to talk about how to give our AI agents a real working memory.", output_token_ids: None, finish_reason: streaming_complete diff --git a/modal-deploy/Sunflower32b-Ultravox/latency_test/time_to_first_token_seconds.ipynb b/modal-deploy/Sunflower32b-Ultravox/latency_test/time_to_first_token_seconds.ipynb new file mode 100644 index 00000000..572e2ad5 --- /dev/null +++ b/modal-deploy/Sunflower32b-Ultravox/latency_test/time_to_first_token_seconds.ipynb @@ -0,0 +1,554 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "id": "05575fcf-f798-4fe8-999d-852876f16657", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9e9e3e53-54c9-4d18-a4d3-0aee4f279633", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the content from your file\n", + "def time_to_first_token_seconds(file_path):\n", + " with open(file_path, 'r') as f:\n", + " lines = f.readlines()\n", + " \n", + " timestamp = []\n", + " log = []\n", + " \n", + " for line in lines:\n", + " line = line.strip()\n", + " if \"Dec 17\" in line:\n", + " timestamp.append(line)\n", + " else:\n", + " log.append(line)\n", + " \n", + " df = pd.DataFrame({\"timestamp\": timestamp, \"log\": log})\n", + "\n", + " keywords = \"Received request|Generated response\"\n", + " df = df[df['log'].str.contains(keywords, na=False)].reset_index(drop=True)\n", + " conditions = [\n", + " df['log'].str.contains(\"Received request\", na=False),\n", + " df['log'].str.contains(\"Generated response\", na=False)\n", + " ]\n", + " choices = ['received', 'generated']\n", + " df['type'] = np.select(conditions, choices, default=\"others\")\n", + " df = df[df.type.isin(choices)]\n", + "\n", + " df['dt'] = pd.to_datetime(df['timestamp'], format='%b %d %H:%M:%S.%f')\n", + " df['time_to_first_token_seconds'] = (df['dt'].shift(-1) - df['dt']).dt.total_seconds()\n", + " df = df[df['type'] == 'received'][['timestamp', 'type', 'time_to_first_token_seconds', 'log']]\n", + "\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "bc36a9f3-b41a-48a5-90b0-eadc49d2930b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamptypetime_to_first_token_secondslog
0Dec 17 16:46:47.223received8.470(APIServer pid=5) INFO 12-17 13:46:47 [logger....
7Dec 17 16:46:58.867received0.055(APIServer pid=5) INFO 12-17 13:46:58 [logger....
14Dec 17 16:47:03.881received0.054(APIServer pid=5) INFO 12-17 13:47:03 [logger....
21Dec 17 16:47:06.461received0.054(APIServer pid=5) INFO 12-17 13:47:06 [logger....
28Dec 17 16:47:09.050received0.053(APIServer pid=5) INFO 12-17 13:47:09 [logger....
35Dec 17 16:47:11.549received0.059(APIServer pid=5) INFO 12-17 13:47:11 [logger....
42Dec 17 16:47:14.352received0.054(APIServer pid=5) INFO 12-17 13:47:14 [logger....
49Dec 17 16:47:17.346received0.056(APIServer pid=5) INFO 12-17 13:47:17 [logger....
56Dec 17 16:47:19.883received0.055(APIServer pid=5) INFO 12-17 13:47:19 [logger....
63Dec 17 16:47:22.583received0.055(APIServer pid=5) INFO 12-17 13:47:22 [logger....
70Dec 17 16:47:25.127received0.061(APIServer pid=5) INFO 12-17 13:47:25 [logger....
77Dec 17 16:47:27.579received0.057(APIServer pid=5) INFO 12-17 13:47:27 [logger....
84Dec 17 16:47:32.780received0.055(APIServer pid=5) INFO 12-17 13:47:32 [logger....
91Dec 17 16:47:35.350received0.054(APIServer pid=5) INFO 12-17 13:47:35 [logger....
98Dec 17 16:47:37.760received0.053(APIServer pid=5) INFO 12-17 13:47:37 [logger....
105Dec 17 16:47:40.360received0.055(APIServer pid=5) INFO 12-17 13:47:40 [logger....
112Dec 17 16:47:43.441received0.059(APIServer pid=5) INFO 12-17 13:47:43 [logger....
119Dec 17 16:47:47.344received0.054(APIServer pid=5) INFO 12-17 13:47:47 [logger....
126Dec 17 16:47:51.776received0.056(APIServer pid=5) INFO 12-17 13:47:51 [logger....
133Dec 17 16:47:54.418received0.055(APIServer pid=5) INFO 12-17 13:47:54 [logger....
\n", + "
" + ], + "text/plain": [ + " timestamp type time_to_first_token_seconds \\\n", + "0 Dec 17 16:46:47.223 received 8.470 \n", + "7 Dec 17 16:46:58.867 received 0.055 \n", + "14 Dec 17 16:47:03.881 received 0.054 \n", + "21 Dec 17 16:47:06.461 received 0.054 \n", + "28 Dec 17 16:47:09.050 received 0.053 \n", + "35 Dec 17 16:47:11.549 received 0.059 \n", + "42 Dec 17 16:47:14.352 received 0.054 \n", + "49 Dec 17 16:47:17.346 received 0.056 \n", + "56 Dec 17 16:47:19.883 received 0.055 \n", + "63 Dec 17 16:47:22.583 received 0.055 \n", + "70 Dec 17 16:47:25.127 received 0.061 \n", + "77 Dec 17 16:47:27.579 received 0.057 \n", + "84 Dec 17 16:47:32.780 received 0.055 \n", + "91 Dec 17 16:47:35.350 received 0.054 \n", + "98 Dec 17 16:47:37.760 received 0.053 \n", + "105 Dec 17 16:47:40.360 received 0.055 \n", + "112 Dec 17 16:47:43.441 received 0.059 \n", + "119 Dec 17 16:47:47.344 received 0.054 \n", + "126 Dec 17 16:47:51.776 received 0.056 \n", + "133 Dec 17 16:47:54.418 received 0.055 \n", + "\n", + " log \n", + "0 (APIServer pid=5) INFO 12-17 13:46:47 [logger.... \n", + "7 (APIServer pid=5) INFO 12-17 13:46:58 [logger.... \n", + "14 (APIServer pid=5) INFO 12-17 13:47:03 [logger.... \n", + "21 (APIServer pid=5) INFO 12-17 13:47:06 [logger.... \n", + "28 (APIServer pid=5) INFO 12-17 13:47:09 [logger.... \n", + "35 (APIServer pid=5) INFO 12-17 13:47:11 [logger.... \n", + "42 (APIServer pid=5) INFO 12-17 13:47:14 [logger.... \n", + "49 (APIServer pid=5) INFO 12-17 13:47:17 [logger.... \n", + "56 (APIServer pid=5) INFO 12-17 13:47:19 [logger.... \n", + "63 (APIServer pid=5) INFO 12-17 13:47:22 [logger.... \n", + "70 (APIServer pid=5) INFO 12-17 13:47:25 [logger.... \n", + "77 (APIServer pid=5) INFO 12-17 13:47:27 [logger.... \n", + "84 (APIServer pid=5) INFO 12-17 13:47:32 [logger.... \n", + "91 (APIServer pid=5) INFO 12-17 13:47:35 [logger.... \n", + "98 (APIServer pid=5) INFO 12-17 13:47:37 [logger.... \n", + "105 (APIServer pid=5) INFO 12-17 13:47:40 [logger.... \n", + "112 (APIServer pid=5) INFO 12-17 13:47:43 [logger.... \n", + "119 (APIServer pid=5) INFO 12-17 13:47:47 [logger.... \n", + "126 (APIServer pid=5) INFO 12-17 13:47:51 [logger.... \n", + "133 (APIServer pid=5) INFO 12-17 13:47:54 [logger.... " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_to_first_token_seconds('what_is_sunflower.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3929f866-5d8e-4ff6-9e76-eb1e9a0018b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timestamptypetime_to_first_token_secondslog
0Dec 17 16:54:26.871received8.205(APIServer pid=5) INFO 12-17 13:54:26 [logger....
77Dec 17 16:55:31.381received0.053(APIServer pid=5) INFO 12-17 13:55:31 [logger....
153Dec 17 16:55:39.345received0.054(APIServer pid=5) INFO 12-17 13:55:39 [logger....
230Dec 17 16:56:13.369received0.054(APIServer pid=5) INFO 12-17 13:56:13 [logger....
307Dec 17 16:56:20.816received0.054(APIServer pid=5) INFO 12-17 13:56:20 [logger....
384Dec 17 16:56:28.076received0.054(APIServer pid=5) INFO 12-17 13:56:28 [logger....
460Dec 17 16:57:06.922received0.054(APIServer pid=5) INFO 12-17 13:57:06 [logger....
538Dec 17 16:57:14.621received0.054(APIServer pid=5) INFO 12-17 13:57:14 [logger....
615Dec 17 16:57:21.380received0.053(APIServer pid=5) INFO 12-17 13:57:21 [logger....
692Dec 17 16:57:28.986received0.053(APIServer pid=5) INFO 12-17 13:57:28 [logger....
769Dec 17 16:57:36.108received0.054(APIServer pid=5) INFO 12-17 13:57:36 [logger....
845Dec 17 16:57:43.565received0.053(APIServer pid=5) INFO 12-17 13:57:43 [logger....
921Dec 17 16:58:24.471received0.054(APIServer pid=5) INFO 12-17 13:58:24 [logger....
998Dec 17 16:58:34.169received0.054(APIServer pid=5) INFO 12-17 13:58:34 [logger....
1073Dec 17 16:58:41.496received0.052(APIServer pid=5) INFO 12-17 13:58:41 [logger....
1150Dec 17 16:58:49.033received0.053(APIServer pid=5) INFO 12-17 13:58:49 [logger....
1226Dec 17 16:58:56.196received0.054(APIServer pid=5) INFO 12-17 13:58:56 [logger....
1303Dec 17 16:59:03.363received0.054(APIServer pid=5) INFO 12-17 13:59:03 [logger....
1379Dec 17 16:59:10.335received0.052(APIServer pid=5) INFO 12-17 13:59:10 [logger....
1456Dec 17 16:59:17.664received0.054(APIServer pid=5) INFO 12-17 13:59:17 [logger....
\n", + "
" + ], + "text/plain": [ + " timestamp type time_to_first_token_seconds \\\n", + "0 Dec 17 16:54:26.871 received 8.205 \n", + "77 Dec 17 16:55:31.381 received 0.053 \n", + "153 Dec 17 16:55:39.345 received 0.054 \n", + "230 Dec 17 16:56:13.369 received 0.054 \n", + "307 Dec 17 16:56:20.816 received 0.054 \n", + "384 Dec 17 16:56:28.076 received 0.054 \n", + "460 Dec 17 16:57:06.922 received 0.054 \n", + "538 Dec 17 16:57:14.621 received 0.054 \n", + "615 Dec 17 16:57:21.380 received 0.053 \n", + "692 Dec 17 16:57:28.986 received 0.053 \n", + "769 Dec 17 16:57:36.108 received 0.054 \n", + "845 Dec 17 16:57:43.565 received 0.053 \n", + "921 Dec 17 16:58:24.471 received 0.054 \n", + "998 Dec 17 16:58:34.169 received 0.054 \n", + "1073 Dec 17 16:58:41.496 received 0.052 \n", + "1150 Dec 17 16:58:49.033 received 0.053 \n", + "1226 Dec 17 16:58:56.196 received 0.054 \n", + "1303 Dec 17 16:59:03.363 received 0.054 \n", + "1379 Dec 17 16:59:10.335 received 0.052 \n", + "1456 Dec 17 16:59:17.664 received 0.054 \n", + "\n", + " log \n", + "0 (APIServer pid=5) INFO 12-17 13:54:26 [logger.... \n", + "77 (APIServer pid=5) INFO 12-17 13:55:31 [logger.... \n", + "153 (APIServer pid=5) INFO 12-17 13:55:39 [logger.... \n", + "230 (APIServer pid=5) INFO 12-17 13:56:13 [logger.... \n", + "307 (APIServer pid=5) INFO 12-17 13:56:20 [logger.... \n", + "384 (APIServer pid=5) INFO 12-17 13:56:28 [logger.... \n", + "460 (APIServer pid=5) INFO 12-17 13:57:06 [logger.... \n", + "538 (APIServer pid=5) INFO 12-17 13:57:14 [logger.... \n", + "615 (APIServer pid=5) INFO 12-17 13:57:21 [logger.... \n", + "692 (APIServer pid=5) INFO 12-17 13:57:28 [logger.... \n", + "769 (APIServer pid=5) INFO 12-17 13:57:36 [logger.... \n", + "845 (APIServer pid=5) INFO 12-17 13:57:43 [logger.... \n", + "921 (APIServer pid=5) INFO 12-17 13:58:24 [logger.... \n", + "998 (APIServer pid=5) INFO 12-17 13:58:34 [logger.... \n", + "1073 (APIServer pid=5) INFO 12-17 13:58:41 [logger.... \n", + "1150 (APIServer pid=5) INFO 12-17 13:58:49 [logger.... \n", + "1226 (APIServer pid=5) INFO 12-17 13:58:56 [logger.... \n", + "1303 (APIServer pid=5) INFO 12-17 13:59:03 [logger.... \n", + "1379 (APIServer pid=5) INFO 12-17 13:59:10 [logger.... \n", + "1456 (APIServer pid=5) INFO 12-17 13:59:17 [logger.... " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_to_first_token_seconds('context_eng_1.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a174a6e-3275-4a1e-8f51-b5a3bdf36bba", + "metadata": {}, + "outputs": [], + "source": [ + "df." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modal-deploy/Sunflower32b-Ultravox/latency_test/what_is_sunflower.txt b/modal-deploy/Sunflower32b-Ultravox/latency_test/what_is_sunflower.txt new file mode 100644 index 00000000..c8c76a70 --- /dev/null +++ b/modal-deploy/Sunflower32b-Ultravox/latency_test/what_is_sunflower.txt @@ -0,0 +1,336 @@ +Dec 17 16:46:47.223 +(APIServer pid=5) INFO 12-17 13:46:47 [logger.py:47] Received request chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. +Dec 17 16:46:55.597 +(APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:46:55 [async_llm.py:344] Added request chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3. +Dec 17 16:46:55.693 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:46:55.740 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:46:55.786 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:46:55.830 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:46:55.877 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:46:55.923 +(APIServer pid=5) INFO 12-17 13:46:55 [logger.py:76] Generated response chatcmpl-b58d6656a95b4d419d91ea91a4c3e6c3 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:46:56.001 +POST /v1/chat/completions -> 200 OK (duration: 35.5 s, execution: 35.4 s) +Dec 17 16:46:58.867 +(APIServer pid=5) INFO 12-17 13:46:58 [logger.py:47] Received request chatcmpl-196a01be059647b19691dd56aab6b2f2: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:46:58 [async_llm.py:344] Added request chatcmpl-196a01be059647b19691dd56aab6b2f2. +Dec 17 16:46:58.922 +(APIServer pid=5) INFO 12-17 13:46:58 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:46:58.968 +(APIServer pid=5) INFO 12-17 13:46:58 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:46:59.015 +(APIServer pid=5) INFO 12-17 13:46:59 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:46:59.065 +(APIServer pid=5) INFO 12-17 13:46:59 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:46:59.111 +(APIServer pid=5) INFO 12-17 13:46:59 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:46:59.158 +(APIServer pid=5) INFO 12-17 13:46:59 [logger.py:76] Generated response chatcmpl-196a01be059647b19691dd56aab6b2f2 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:46:59.233 +POST /v1/chat/completions -> 200 OK (duration: 2.02 s, execution: 1.92 s) +Dec 17 16:47:03.881 +(APIServer pid=5) INFO 12-17 13:47:03 [logger.py:47] Received request chatcmpl-511952e787824ce18e70048e4e37908a: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:03 [async_llm.py:344] Added request chatcmpl-511952e787824ce18e70048e4e37908a. +Dec 17 16:47:03.935 +(APIServer pid=5) INFO 12-17 13:47:03 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:03.985 +(APIServer pid=5) INFO 12-17 13:47:03 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:04.032 +(APIServer pid=5) INFO 12-17 13:47:04 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:04.077 +(APIServer pid=5) INFO 12-17 13:47:04 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:04.125 +(APIServer pid=5) INFO 12-17 13:47:04 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:04.171 +(APIServer pid=5) INFO 12-17 13:47:04 [logger.py:76] Generated response chatcmpl-511952e787824ce18e70048e4e37908a (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:04.294 +POST /v1/chat/completions -> 200 OK (duration: 4.16 s, execution: 4.01 s) +Dec 17 16:47:05.591 +(APIServer pid=5) INFO 12-17 13:47:05 [loggers.py:236] Engine 000: Avg prompt throughput: 21.0 tokens/s, Avg generation throughput: 1.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 61.0%, MM cache hit rate: 66.7% +Dec 17 16:47:06.461 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:47] Received request chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:06 [async_llm.py:344] Added request chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d. +Dec 17 16:47:06.515 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:06.561 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:06.608 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:06.655 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:06.704 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:06.747 +(APIServer pid=5) INFO 12-17 13:47:06 [logger.py:76] Generated response chatcmpl-96a1b8b2685c4c818f189e6e1ddf641d (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:06.834 +POST /v1/chat/completions -> 200 OK (duration: 1.80 s, execution: 1.71 s) +Dec 17 16:47:09.050 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:47] Received request chatcmpl-590cb140bac241afa7498eda11c41e97: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:09 [async_llm.py:344] Added request chatcmpl-590cb140bac241afa7498eda11c41e97. +Dec 17 16:47:09.103 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:09.150 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:09.196 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:09.242 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:09.290 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:09.335 +(APIServer pid=5) INFO 12-17 13:47:09 [logger.py:76] Generated response chatcmpl-590cb140bac241afa7498eda11c41e97 (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:09.393 +POST /v1/chat/completions -> 200 OK (duration: 1.77 s, execution: 1.64 s) +Dec 17 16:47:11.549 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:47] Received request chatcmpl-af0e746ee35c439ea41d3236d8e90677: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:11 [async_llm.py:344] Added request chatcmpl-af0e746ee35c439ea41d3236d8e90677. +Dec 17 16:47:11.608 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:11.655 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:11.701 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:11.746 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:11.793 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:11.837 +(APIServer pid=5) INFO 12-17 13:47:11 [logger.py:76] Generated response chatcmpl-af0e746ee35c439ea41d3236d8e90677 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:11.912 +POST /v1/chat/completions -> 200 OK (duration: 1.69 s, execution: 1.63 s) +Dec 17 16:47:14.352 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:47] Received request chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:14 [async_llm.py:344] Added request chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d. +Dec 17 16:47:14.406 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:14.453 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:14.499 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:14.545 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:14.589 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:14.634 +(APIServer pid=5) INFO 12-17 13:47:14 [logger.py:76] Generated response chatcmpl-58ce52fbff8a4616a63fbedefa2a3f8d (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:14.695 +POST /v1/chat/completions -> 200 OK (duration: 1.92 s, execution: 1.81 s) +Dec 17 16:47:15.591 +(APIServer pid=5) INFO 12-17 13:47:15 [loggers.py:236] Engine 000: Avg prompt throughput: 28.0 tokens/s, Avg generation throughput: 2.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 78.4%, MM cache hit rate: 85.7% +Dec 17 16:47:17.346 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:47] Received request chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:17 [async_llm.py:344] Added request chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919. +Dec 17 16:47:17.402 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:17.446 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:17.494 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:17.545 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:17.590 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:17.636 +(APIServer pid=5) INFO 12-17 13:47:17 [logger.py:76] Generated response chatcmpl-1bfc32ec4d0c45d58e299fa6bd22a919 (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:17.761 +POST /v1/chat/completions -> 200 OK (duration: 2.19 s, execution: 2.03 s) +Dec 17 16:47:19.883 +(APIServer pid=5) INFO 12-17 13:47:19 [logger.py:47] Received request chatcmpl-0757c342f7424778a7e6748697abd02a: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:19 [async_llm.py:344] Added request chatcmpl-0757c342f7424778a7e6748697abd02a. +Dec 17 16:47:19.938 +(APIServer pid=5) INFO 12-17 13:47:19 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:19.984 +(APIServer pid=5) INFO 12-17 13:47:19 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:20.033 +(APIServer pid=5) INFO 12-17 13:47:20 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:20.076 +(APIServer pid=5) INFO 12-17 13:47:20 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:20.122 +(APIServer pid=5) INFO 12-17 13:47:20 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:20.167 +(APIServer pid=5) INFO 12-17 13:47:20 [logger.py:76] Generated response chatcmpl-0757c342f7424778a7e6748697abd02a (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:20.265 +POST /v1/chat/completions -> 200 OK (duration: 1.72 s, execution: 1.63 s) +Dec 17 16:47:22.583 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:47] Received request chatcmpl-5041914cbd5846cbbefb36afbb280df8: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:22 [async_llm.py:344] Added request chatcmpl-5041914cbd5846cbbefb36afbb280df8. +Dec 17 16:47:22.638 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:22.684 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:22.731 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:22.778 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:22.822 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:22.869 +(APIServer pid=5) INFO 12-17 13:47:22 [logger.py:76] Generated response chatcmpl-5041914cbd5846cbbefb36afbb280df8 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:22.932 +POST /v1/chat/completions -> 200 OK (duration: 1.83 s, execution: 1.74 s) +Dec 17 16:47:25.127 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:47] Received request chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:25 [async_llm.py:344] Added request chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec. +Dec 17 16:47:25.188 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:25.234 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:25.280 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:25.328 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:25.374 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:25.422 +(APIServer pid=5) INFO 12-17 13:47:25 [logger.py:76] Generated response chatcmpl-d09bbe7848974daeaf3b7460cf0c0fec (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:25.496 +POST /v1/chat/completions -> 200 OK (duration: 1.73 s, execution: 1.67 s) +Dec 17 16:47:25.593 +(APIServer pid=5) INFO 12-17 13:47:25 [loggers.py:236] Engine 000: Avg prompt throughput: 28.0 tokens/s, Avg generation throughput: 2.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 83.1%, MM cache hit rate: 90.9% +Dec 17 16:47:27.579 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:47] Received request chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:27 [async_llm.py:344] Added request chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21. +Dec 17 16:47:27.636 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:27.688 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:27.739 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:27.790 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:27.835 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:27.887 +(APIServer pid=5) INFO 12-17 13:47:27 [logger.py:76] Generated response chatcmpl-d6850bb5f3bb4bd08c88cd99b19c1a21 (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:28.032 +POST /v1/chat/completions -> 200 OK (duration: 1.69 s, execution: 1.62 s) +Dec 17 16:47:32.780 +(APIServer pid=5) INFO 12-17 13:47:32 [logger.py:47] Received request chatcmpl-b4bd1fca39344920a85307bc7d62a86d: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:32 [async_llm.py:344] Added request chatcmpl-b4bd1fca39344920a85307bc7d62a86d. +Dec 17 16:47:32.835 +(APIServer pid=5) INFO 12-17 13:47:32 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:32.881 +(APIServer pid=5) INFO 12-17 13:47:32 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:32.926 +(APIServer pid=5) INFO 12-17 13:47:32 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:32.974 +(APIServer pid=5) INFO 12-17 13:47:32 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:33.020 +(APIServer pid=5) INFO 12-17 13:47:33 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:33.117 +POST /v1/chat/completions -> 200 OK (duration: 4.37 s, execution: 4.31 s) +Dec 17 16:47:33.066 +(APIServer pid=5) INFO 12-17 13:47:33 [logger.py:76] Generated response chatcmpl-b4bd1fca39344920a85307bc7d62a86d (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:35.350 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:47] Received request chatcmpl-b0a63e31ab284b5194b3c8d643165abb: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:35 [async_llm.py:344] Added request chatcmpl-b0a63e31ab284b5194b3c8d643165abb. +Dec 17 16:47:35.404 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:35.456 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:35.501 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:35.554 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:35.594 +(APIServer pid=5) INFO 12-17 13:47:35 [loggers.py:236] Engine 000: Avg prompt throughput: 21.0 tokens/s, Avg generation throughput: 1.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.3%, Prefix cache hit rate: 84.9%, MM cache hit rate: 92.9% +Dec 17 16:47:35.601 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:35.646 +(APIServer pid=5) INFO 12-17 13:47:35 [logger.py:76] Generated response chatcmpl-b0a63e31ab284b5194b3c8d643165abb (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:35.730 +POST /v1/chat/completions -> 200 OK (duration: 1.76 s, execution: 1.70 s) +Dec 17 16:47:37.760 +(APIServer pid=5) INFO 12-17 13:47:37 [logger.py:47] Received request chatcmpl-bcfced085e444e80add34db10f7ed36e: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:37 [async_llm.py:344] Added request chatcmpl-bcfced085e444e80add34db10f7ed36e. +Dec 17 16:47:37.813 +(APIServer pid=5) INFO 12-17 13:47:37 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:37.861 +(APIServer pid=5) INFO 12-17 13:47:37 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:37.933 +(APIServer pid=5) INFO 12-17 13:47:37 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:37.980 +(APIServer pid=5) INFO 12-17 13:47:37 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:38.031 +(APIServer pid=5) INFO 12-17 13:47:38 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:38.077 +(APIServer pid=5) INFO 12-17 13:47:38 [logger.py:76] Generated response chatcmpl-bcfced085e444e80add34db10f7ed36e (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:38.179 +POST /v1/chat/completions -> 200 OK (duration: 1.62 s, execution: 1.51 s) +Dec 17 16:47:40.360 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:47] Received request chatcmpl-2c8ab9f1a66f4878aa34a0123362384d: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:40 [async_llm.py:344] Added request chatcmpl-2c8ab9f1a66f4878aa34a0123362384d. +Dec 17 16:47:40.415 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:40.474 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:40.523 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:40.568 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:40.617 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:40.662 +(APIServer pid=5) INFO 12-17 13:47:40 [logger.py:76] Generated response chatcmpl-2c8ab9f1a66f4878aa34a0123362384d (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:40.751 +POST /v1/chat/completions -> 200 OK (duration: 1.75 s, execution: 1.68 s) +Dec 17 16:47:43.441 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:47] Received request chatcmpl-71675fb32535413e9a5d721637ecf6ce: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:43 [async_llm.py:344] Added request chatcmpl-71675fb32535413e9a5d721637ecf6ce. +Dec 17 16:47:43.500 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:43.550 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:43.596 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:43.642 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:43.692 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:43.739 +(APIServer pid=5) INFO 12-17 13:47:43 [logger.py:76] Generated response chatcmpl-71675fb32535413e9a5d721637ecf6ce (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:43.829 +POST /v1/chat/completions -> 200 OK (duration: 2.26 s, execution: 2.16 s) +Dec 17 16:47:45.595 +(APIServer pid=5) INFO 12-17 13:47:45 [loggers.py:236] Engine 000: Avg prompt throughput: 21.0 tokens/s, Avg generation throughput: 2.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 86.1%, MM cache hit rate: 94.1% +Dec 17 16:47:47.344 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:47] Received request chatcmpl-09c9320983684e5b8706af8cb9921a20: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:47 [async_llm.py:344] Added request chatcmpl-09c9320983684e5b8706af8cb9921a20. +Dec 17 16:47:47.398 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:47.445 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:47.495 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming delta): output: ' Sun', output_token_ids: [8059], finish_reason: None +Dec 17 16:47:47.544 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:47.590 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:47.636 +(APIServer pid=5) INFO 12-17 13:47:47 [logger.py:76] Generated response chatcmpl-09c9320983684e5b8706af8cb9921a20 (streaming complete): output: 'What is Sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:47.722 +POST /v1/chat/completions -> 200 OK (duration: 3.05 s, execution: 2.95 s) +Dec 17 16:47:51.776 +(APIServer pid=5) INFO 12-17 13:47:51 [logger.py:47] Received request chatcmpl-f97d13e144674430a77c171824d56465: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:51 [async_llm.py:344] Added request chatcmpl-f97d13e144674430a77c171824d56465. +Dec 17 16:47:51.832 +(APIServer pid=5) INFO 12-17 13:47:51 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:51.878 +(APIServer pid=5) INFO 12-17 13:47:51 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:51.924 +(APIServer pid=5) INFO 12-17 13:47:51 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:51.973 +(APIServer pid=5) INFO 12-17 13:47:51 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:52.016 +(APIServer pid=5) INFO 12-17 13:47:52 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:52.062 +(APIServer pid=5) INFO 12-17 13:47:52 [logger.py:76] Generated response chatcmpl-f97d13e144674430a77c171824d56465 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:52.130 +POST /v1/chat/completions -> 200 OK (duration: 3.58 s, execution: 3.51 s) +Dec 17 16:47:54.418 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:47] Received request chatcmpl-42d7c7a79e434f0098f2cc909d330a92: params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4041, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None), lora_request: None. (APIServer pid=5) INFO: 172.20.2.138:50806 - "POST /v1/chat/completions HTTP/1.1" 200 OK (APIServer pid=5) INFO 12-17 13:47:54 [async_llm.py:344] Added request chatcmpl-42d7c7a79e434f0098f2cc909d330a92. +Dec 17 16:47:54.473 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming delta): output: 'What', output_token_ids: [3838], finish_reason: None +Dec 17 16:47:54.519 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming delta): output: ' is', output_token_ids: [374], finish_reason: None +Dec 17 16:47:54.565 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming delta): output: ' sun', output_token_ids: [7015], finish_reason: None +Dec 17 16:47:54.611 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming delta): output: 'flower', output_token_ids: [38753], finish_reason: None +Dec 17 16:47:54.657 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming delta): output: '?', output_token_ids: [30], finish_reason: None +Dec 17 16:47:54.705 +(APIServer pid=5) INFO 12-17 13:47:54 [logger.py:76] Generated response chatcmpl-42d7c7a79e434f0098f2cc909d330a92 (streaming complete): output: 'What is sunflower?', output_token_ids: None, finish_reason: streaming_complete +Dec 17 16:47:54.790 +POST /v1/chat/completions -> 200 OK (duration: 1.74 s, execution: 1.67 s) +Dec 17 16:47:55.596 +(APIServer pid=5) INFO 12-17 13:47:55 [loggers.py:236] Engine 000: Avg prompt throughput: 21.0 tokens/s, Avg generation throughput: 1.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 86.9%, MM cache hit rate: 95.0% +Dec 17 16:48:05.597 +(APIServer pid=5) INFO 12-17 13:48:05 [loggers.py:236] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 86.9%, MM cache hit rate: 95.0% \ No newline at end of file diff --git a/modal-deploy/Sunflower32b-Ultravox/vllm_inference.py b/modal-deploy/Sunflower32b-Ultravox/vllm_inference.py index 26520ba1..a1bd26df 100644 --- a/modal-deploy/Sunflower32b-Ultravox/vllm_inference.py +++ b/modal-deploy/Sunflower32b-Ultravox/vllm_inference.py @@ -70,7 +70,9 @@ def serve(): "--port", str(VLLM_PORT), "--trust-remote-code", - "--max-model-len 4096" + "--max-model-len 4096", + "--enable-log-requests", + # "--enable-log-outputs", ] # enforce-eager disables both Torch compilation and CUDA graph capture diff --git a/modal-deploy/spark-tts-salt/vllm_inference.py b/modal-deploy/spark-tts-salt/vllm_inference.py new file mode 100644 index 00000000..17b28d17 --- /dev/null +++ b/modal-deploy/spark-tts-salt/vllm_inference.py @@ -0,0 +1,219 @@ +# Deploy the Spark-TTS-Salt model with vLLM: +# +# ```shell +# modal deploy vllm_inference.py +# ``` +# +# And query the endpoint with: +# +# ```shell +# curl -X POST --get "https://sb-modal-ws--spark-tts-salt-sparktts-generate.modal.run" \ +# --data-urlencode "text=I am a nurse who takes care of many people who have cancer." \ +# --data-urlencode "speaker_id=248" \ +# --output output.wav +# ``` +# +# You'll receive a WAV file named `output.wav` containing the generated audio. + +import io +import modal +from typing import List + +# ## Define a container image +# We start with Modal's baseline `debian_slim` image and install the required packages. + +image = ( + modal.Image.debian_slim(python_version="3.12") + .apt_install("git") + .uv_pip_install( + "fastapi[standard]", + "einx", + "einops", + "soundfile", + "numpy", + "torch", + "librosa", + "vllm==0.12.0", + "omegaconf", + "huggingface_hub", + ) + .run_commands("git clone https://github.com/SparkAudio/Spark-TTS /root/Spark-TTS") + .env({"PYTHONPATH": "/root/Spark-TTS"}) +) +app = modal.App("spark-tts-salt", image=image) + +# Import the required libraries within the image context to ensure they're available +# when the container runs. This includes audio processing and the TTS model itself. + +with image.imports(): + import re + import numpy as np + import torch + import soundfile as sf + from typing import List + from vllm import LLM + from vllm.sampling_params import SamplingParams + from huggingface_hub import snapshot_download + from fastapi.responses import StreamingResponse + from sparktts.models.audio_tokenizer import BiCodecTokenizer + import time + +# cache model weights with Modal Volumes +hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True) +HF_CACHE_DIR = "/root/.cache/huggingface" + + +# ## The TTS model class + +# The TTS service is implemented using Modal's class syntax with GPU acceleration. + +# - `scaledown_window=60 * 3`: Keep containers alive for 3 minutes after last request +# - `enable_memory_snapshot=True`: Enable [memory snapshots](https://modal.com/docs/guide/memory-snapshot) to optimize cold boot times +# - `@modal.concurrent(max_inputs=10)`: Allow up to 10 concurrent requests per container + +@app.cls( + gpu="L4", + scaledown_window=60 * 3, + enable_memory_snapshot=True, + secrets=[modal.Secret.from_name("huggingface-read")], + volumes={ + HF_CACHE_DIR: hf_cache_vol, + } +) +@modal.concurrent(max_inputs=10) +class SparkTTS: + # 241: Acholi (female) + # 242: Ateso (female) + # 243: Runyankore (female) + # 245: Lugbara (female) + # 246: Swahili (male) + # 248: Luganda (female) + GLOBAL_IDS_BY_SPEAKER = { + 241: [1755, 1265, 184, 3545, 2718, 2405, 3237, 1360, 3621, 1850, 37, 3382, 736, + 3380, 3131, 2036, 244, 2128, 254, 2550, 3181, 764, 1277, 502, 2941, 1993, + 3556, 1428, 3505, 3245, 3506, 1540], + 242: [1367, 1522, 308, 4061, 1449, 2468, 2193, 1349, 3458, 2339, 1651, 3174, + 501, 3364, 3194, 2041, 442, 1061, 502, 2234, 2397, 358, 3829, 2490, 2031, + 1002, 3548, 586, 3445, 1419, 4093, 2908], + 243: [2051, 242, 2684, 4062, 2654, 2252, 353, 3657, 2759, 3254, 1649, 3366, + 1017, 3600, 3131, 3813, 1535, 1595, 1059, 237, 2158, 1174, 4085, 2174, + 3791, 990, 3274, 2693, 3829, 2271, 2650, 1689], + 245: [2031, 2545, 116, 4060, 746, 1385, 3301, 1312, 3638, 1846, 85, 3190, 1016, + 3384, 3134, 954, 244, 1104, 235, 2549, 3357, 508, 1278, 1974, 2621, 1896, + 3812, 2185, 3061, 2941, 1187, 5], + 246: [1811, 1138, 2873, 3309, 2639, 723, 3363, 974, 1612, 2531, 1769, 3376, + 933, 3848, 3195, 2180, 2359, 1275, 3493, 3260, 2279, 3715, 3508, 2433, + 4082, 1087, 3545, 1449, 160, 3531, 2908, 2094], + 248: [2559, 1523, 440, 3789, 1438, 373, 2212, 1248, 3369, 1847, 36, 3126, 480, + 3380, 3133, 2041, 248, 2384, 730, 2554, 3182, 1785, 1277, 1013, 2425, + 1932, 3560, 1177, 2736, 2430, 2722, 261] + } + + @modal.enter() + def load(self): + print("Loading Spark TTS model...") + self.model = LLM( + "Sunbird/spark-tts-salt", + enforce_eager=False, + gpu_memory_utilization=0.5) # Leave some VRAM for the audio tokeniser + print("✅ Model loaded successfully!") + + # Download tokenizer model files + model_base_repo = "unsloth/Spark-TTS-0.5B" + print(f"Downloading tokenizer files from {model_base_repo}...") + snapshot_download( + repo_id=model_base_repo, + local_dir=HF_CACHE_DIR, + ignore_patterns=["*LLM*"], # Skip LLM files, we only need tokenizer + ) + print(f"✅ Tokenizer files downloaded to {HF_CACHE_DIR}") + + # Initialize the audio tokenizer + print("Initializing audio tokenizer...") + self.audio_tokenizer = BiCodecTokenizer(HF_CACHE_DIR) + self.audio_tokenizer.model.to('cuda') + print("✅ Audio tokenizer initialized!") + + @modal.fastapi_endpoint(docs=True, method="POST") + def generate(self, text: str, speaker_id: int = 241, temperature: float = 0.6): + start_time = time.time() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + texts = self.chunk_text_simple(text) + texts = [t.strip() for t in texts if len(t.strip()) > 0] + + sampling_params = SamplingParams(temperature=temperature, max_tokens=2048) + + global_tokens = self.GLOBAL_IDS_BY_SPEAKER[speaker_id] + + prompts = [] + for text in texts: + prompt = f"<|task_tts|><|start_content|>{speaker_id}: {text}<|end_content|><|start_global_token|>" + prompt += ''.join([f'<|bicodec_global_{t}|>' for t in global_tokens]) + '<|end_global_token|><|start_semantic_token|>' + prompts.append(prompt) + + gen_start = time.time() + outputs = self.model.generate( + prompts=prompts, + sampling_params=sampling_params + ) + print(f"Model generation time: {time.time() - gen_start:.2f}s") + + decode_start = time.time() + speech_segments = [] + + for i in range(len(outputs)): + predicted_tokens = outputs[i].outputs[0].text + semantic_matches = re.findall(r"<\|bicodec_semantic_(\d+)\|>", predicted_tokens) + if not semantic_matches: + raise ValueError("No semantic tokens found in the generated output.") + + pred_semantic_ids = ( + torch.tensor([int(token) for token in semantic_matches]).long().unsqueeze(0) + ) + + pred_global_ids = torch.Tensor([global_tokens]).long() + + wav_np = self.audio_tokenizer.detokenize( + pred_global_ids.to(device), pred_semantic_ids.to(device) + ) + speech_segments.append(wav_np) + + result_wav = np.concatenate(speech_segments) + print(f"Audio decoding time: {time.time() - decode_start:.2f}s") + + save_start = time.time() + # Create an in-memory buffer to store the WAV file + buffer = io.BytesIO() + + # Save the generated audio to the buffer in WAV format + # Uses the model's sample rate and WAV format + sf.write(buffer, result_wav, self.audio_tokenizer.config["sample_rate"], format='WAV') + + # Reset buffer position to the beginning for reading + buffer.seek(0) + print(f"Audio saving time: {time.time() - save_start:.2f}s") + + print(f"Total generation time: {time.time() - start_time:.2f}s") + # Return the audio as a streaming response with appropriate MIME type. + # This allows for browsers to playback audio directly. + return StreamingResponse( + buffer, + media_type="audio/wav", + ) + + def chunk_text_simple(self, text: str) -> List[str]: + """ + Split text into individual sentences. + + Recommended for TTS - provides maximum control with one sentence per chunk. + + Args: + text: The input string to chunk + + Returns: + List of individual sentences + """ + sentences = re.split(r'(?<=[.!?])\s+', text.strip()) + return [s.strip() for s in sentences if s.strip()]