Skip to content

Commit 04504d8

Browse files
authored
Forward Rate limits to the UI (#3965)
We currently get information about rate limits in the response headers. We want to forward them to the clients to have better transparency. UI/UX plans have been discussed and this information is needed.
1 parent 42d335d commit 04504d8

File tree

6 files changed

+192
-12
lines changed

6 files changed

+192
-12
lines changed

codex-rs/core/src/chat_completions.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ where
716716
// Not an assistant message – forward immediately.
717717
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
718718
}
719+
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
720+
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
721+
}
719722
Poll::Ready(Some(Ok(ResponseEvent::Completed {
720723
response_id,
721724
token_usage,

codex-rs/core/src/client.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use eventsource_stream::Eventsource;
1111
use futures::prelude::*;
1212
use regex_lite::Regex;
1313
use reqwest::StatusCode;
14+
use reqwest::header::HeaderMap;
1415
use serde::Deserialize;
1516
use serde::Serialize;
1617
use serde_json::Value;
@@ -40,6 +41,7 @@ use crate::model_provider_info::ModelProviderInfo;
4041
use crate::model_provider_info::WireApi;
4142
use crate::openai_model_info::get_model_info;
4243
use crate::openai_tools::create_tools_json_for_responses_api;
44+
use crate::protocol::RateLimitSnapshotEvent;
4345
use crate::protocol::TokenUsage;
4446
use crate::token_data::PlanType;
4547
use crate::util::backoff;
@@ -274,6 +276,15 @@ impl ModelClient {
274276
Ok(resp) if resp.status().is_success() => {
275277
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
276278

279+
if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
280+
&& tx_event
281+
.send(Ok(ResponseEvent::RateLimits(snapshot)))
282+
.await
283+
.is_err()
284+
{
285+
debug!("receiver dropped rate limit snapshot event");
286+
}
287+
277288
// spawn task to process SSE
278289
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
279290
tokio::spawn(process_sse(
@@ -473,6 +484,38 @@ fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
473484
}
474485
}
475486

487+
fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshotEvent> {
488+
let primary_used_percent = parse_header_f64(headers, "x-codex-primary-used-percent")?;
489+
let weekly_used_percent = parse_header_f64(headers, "x-codex-protection-used-percent")?;
490+
let primary_to_weekly_ratio_percent =
491+
parse_header_f64(headers, "x-codex-primary-over-protection-limit-percent")?;
492+
let primary_window_minutes = parse_header_u64(headers, "x-codex-primary-window-minutes")?;
493+
let weekly_window_minutes = parse_header_u64(headers, "x-codex-protection-window-minutes")?;
494+
495+
Some(RateLimitSnapshotEvent {
496+
primary_used_percent,
497+
weekly_used_percent,
498+
primary_to_weekly_ratio_percent,
499+
primary_window_minutes,
500+
weekly_window_minutes,
501+
})
502+
}
503+
504+
fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
505+
parse_header_str(headers, name)?
506+
.parse::<f64>()
507+
.ok()
508+
.filter(|v| v.is_finite())
509+
}
510+
511+
fn parse_header_u64(headers: &HeaderMap, name: &str) -> Option<u64> {
512+
parse_header_str(headers, name)?.parse::<u64>().ok()
513+
}
514+
515+
fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
516+
headers.get(name)?.to_str().ok()
517+
}
518+
476519
async fn process_sse<S>(
477520
stream: S,
478521
tx_event: mpsc::Sender<Result<ResponseEvent>>,

codex-rs/core/src/client_common.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::error::Result;
22
use crate::model_family::ModelFamily;
33
use crate::openai_tools::OpenAiTool;
4+
use crate::protocol::RateLimitSnapshotEvent;
45
use crate::protocol::TokenUsage;
56
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
67
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
@@ -78,6 +79,7 @@ pub enum ResponseEvent {
7879
WebSearchCallBegin {
7980
call_id: String,
8081
},
82+
RateLimits(RateLimitSnapshotEvent),
8183
}
8284

8385
#[derive(Debug, Serialize)]

codex-rs/core/src/codex.rs

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,15 @@ use crate::protocol::ListCustomPromptsResponseEvent;
9898
use crate::protocol::Op;
9999
use crate::protocol::PatchApplyBeginEvent;
100100
use crate::protocol::PatchApplyEndEvent;
101+
use crate::protocol::RateLimitSnapshotEvent;
101102
use crate::protocol::ReviewDecision;
102103
use crate::protocol::ReviewOutputEvent;
103104
use crate::protocol::SandboxPolicy;
104105
use crate::protocol::SessionConfiguredEvent;
105106
use crate::protocol::StreamErrorEvent;
106107
use crate::protocol::Submission;
107108
use crate::protocol::TaskCompleteEvent;
109+
use crate::protocol::TokenCountEvent;
108110
use crate::protocol::TokenUsage;
109111
use crate::protocol::TokenUsageInfo;
110112
use crate::protocol::TurnDiffEvent;
@@ -257,6 +259,7 @@ struct State {
257259
pending_input: Vec<ResponseInputItem>,
258260
history: ConversationHistory,
259261
token_info: Option<TokenUsageInfo>,
262+
latest_rate_limits: Option<RateLimitSnapshotEvent>,
260263
}
261264

262265
/// Context for an initialized model agent
@@ -738,16 +741,30 @@ impl Session {
738741
async fn update_token_usage_info(
739742
&self,
740743
turn_context: &TurnContext,
741-
token_usage: &Option<TokenUsage>,
742-
) -> Option<TokenUsageInfo> {
744+
token_usage: Option<&TokenUsage>,
745+
) {
743746
let mut state = self.state.lock().await;
744-
let info = TokenUsageInfo::new_or_append(
745-
&state.token_info,
746-
token_usage,
747-
turn_context.client.get_model_context_window(),
748-
);
749-
state.token_info = info.clone();
750-
info
747+
if let Some(token_usage) = token_usage {
748+
let info = TokenUsageInfo::new_or_append(
749+
&state.token_info,
750+
&Some(token_usage.clone()),
751+
turn_context.client.get_model_context_window(),
752+
);
753+
state.token_info = info;
754+
}
755+
}
756+
757+
async fn update_rate_limits(&self, new_rate_limits: RateLimitSnapshotEvent) {
758+
let mut state = self.state.lock().await;
759+
state.latest_rate_limits = Some(new_rate_limits);
760+
}
761+
762+
async fn get_token_count_event(&self) -> TokenCountEvent {
763+
let state = self.state.lock().await;
764+
TokenCountEvent {
765+
info: state.token_info.clone(),
766+
rate_limits: state.latest_rate_limits.clone(),
767+
}
751768
}
752769

753770
/// Record a user input item to conversation history and also persist a
@@ -2136,17 +2153,22 @@ async fn try_run_turn(
21362153
})
21372154
.await;
21382155
}
2156+
ResponseEvent::RateLimits(snapshot) => {
2157+
// Update internal state with latest rate limits, but defer sending until
2158+
// token usage is available to avoid duplicate TokenCount events.
2159+
sess.update_rate_limits(snapshot).await;
2160+
}
21392161
ResponseEvent::Completed {
21402162
response_id: _,
21412163
token_usage,
21422164
} => {
2143-
let info = sess
2144-
.update_token_usage_info(turn_context, &token_usage)
2165+
sess.update_token_usage_info(turn_context, token_usage.as_ref())
21452166
.await;
2167+
let token_event = sess.get_token_count_event().await;
21462168
let _ = sess
21472169
.send_event(Event {
21482170
id: sub_id.to_string(),
2149-
msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
2171+
msg: EventMsg::TokenCount(token_event),
21502172
})
21512173
.await;
21522174

codex-rs/core/tests/suite/client.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
2222
use codex_protocol::models::WebSearchAction;
2323
use core_test_support::load_default_config_for_test;
2424
use core_test_support::load_sse_fixture_with_id;
25+
use core_test_support::responses;
2526
use core_test_support::wait_for_event;
2627
use futures::StreamExt;
2728
use serde_json::json;
@@ -776,6 +777,100 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
776777
assert_eq!(body["input"][5]["id"].as_str(), Some("custom-tool-id"));
777778
}
778779

780+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
781+
async fn token_count_includes_rate_limits_snapshot() {
782+
let server = MockServer::start().await;
783+
784+
let sse_body = responses::sse(vec![responses::ev_completed_with_tokens("resp_rate", 123)]);
785+
786+
let response = ResponseTemplate::new(200)
787+
.insert_header("content-type", "text/event-stream")
788+
.insert_header("x-codex-primary-used-percent", "12.5")
789+
.insert_header("x-codex-protection-used-percent", "40.0")
790+
.insert_header("x-codex-primary-over-protection-limit-percent", "75.0")
791+
.insert_header("x-codex-primary-window-minutes", "10")
792+
.insert_header("x-codex-protection-window-minutes", "60")
793+
.set_body_raw(sse_body, "text/event-stream");
794+
795+
Mock::given(method("POST"))
796+
.and(path("/v1/responses"))
797+
.respond_with(response)
798+
.expect(1)
799+
.mount(&server)
800+
.await;
801+
802+
let mut provider = built_in_model_providers()["openai"].clone();
803+
provider.base_url = Some(format!("{}/v1", server.uri()));
804+
805+
let home = TempDir::new().unwrap();
806+
let mut config = load_default_config_for_test(&home);
807+
config.model_provider = provider;
808+
809+
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
810+
let codex = conversation_manager
811+
.new_conversation(config)
812+
.await
813+
.expect("create conversation")
814+
.conversation;
815+
816+
codex
817+
.submit(Op::UserInput {
818+
items: vec![InputItem::Text {
819+
text: "hello".into(),
820+
}],
821+
})
822+
.await
823+
.unwrap();
824+
825+
let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
826+
let final_payload = match token_event {
827+
EventMsg::TokenCount(ev) => ev,
828+
_ => unreachable!(),
829+
};
830+
// Assert full JSON for the final token count event (usage + rate limits)
831+
let final_json = serde_json::to_value(&final_payload).unwrap();
832+
pretty_assertions::assert_eq!(
833+
final_json,
834+
json!({
835+
"info": {
836+
"total_token_usage": {
837+
"input_tokens": 123,
838+
"cached_input_tokens": 0,
839+
"output_tokens": 0,
840+
"reasoning_output_tokens": 0,
841+
"total_tokens": 123
842+
},
843+
"last_token_usage": {
844+
"input_tokens": 123,
845+
"cached_input_tokens": 0,
846+
"output_tokens": 0,
847+
"reasoning_output_tokens": 0,
848+
"total_tokens": 123
849+
},
850+
// Default model is gpt-5 in tests → 272000 context window
851+
"model_context_window": 272000
852+
},
853+
"rate_limits": {
854+
"primary_used_percent": 12.5,
855+
"weekly_used_percent": 40.0,
856+
"primary_to_weekly_ratio_percent": 75.0,
857+
"primary_window_minutes": 10,
858+
"weekly_window_minutes": 60
859+
}
860+
})
861+
);
862+
let usage = final_payload
863+
.info
864+
.expect("token usage info should be recorded after completion");
865+
assert_eq!(usage.total_token_usage.total_tokens, 123);
866+
let final_snapshot = final_payload
867+
.rate_limits
868+
.expect("latest rate limit snapshot should be retained");
869+
assert_eq!(final_snapshot.primary_used_percent, 12.5);
870+
871+
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
872+
}
873+
779874
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
780875
async fn azure_overrides_assign_properties_used_for_responses_url() {
781876
let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };

codex-rs/protocol/src/protocol.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,21 @@ impl TokenUsageInfo {
589589
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
590590
pub struct TokenCountEvent {
591591
pub info: Option<TokenUsageInfo>,
592+
pub rate_limits: Option<RateLimitSnapshotEvent>,
593+
}
594+
595+
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
596+
pub struct RateLimitSnapshotEvent {
597+
/// Percentage (0-100) of the primary window that has been consumed.
598+
pub primary_used_percent: f64,
599+
/// Percentage (0-100) of the protection window that has been consumed.
600+
pub weekly_used_percent: f64,
601+
/// Size of the primary window relative to weekly (0-100).
602+
pub primary_to_weekly_ratio_percent: f64,
603+
/// Rolling window duration for the primary limit, in minutes.
604+
pub primary_window_minutes: u64,
605+
/// Rolling window duration for the weekly limit, in minutes.
606+
pub weekly_window_minutes: u64,
592607
}
593608

594609
// Includes prompts, tools and space to call compact.

0 commit comments

Comments
 (0)