Skip to content

Commit 67aab04

Browse files
authored
[codex exec] Add item.started and support it for command execution (#4250)
Adds a new `item.started` event to `codex exec` and implements it for command_execution item type. ```jsonl {"type":"session.created","session_id":"019982d1-75f0-7920-b051-e0d3731a5ed8"} {"type":"item.completed","item":{"id":"item_0","item_type":"reasoning","text":"**Executing commands securely**\n\nI'm thinking about how the default harness typically uses \"bash -lc,\" while historically \"bash\" is what we've been using. The command should be executed as a string in our CLI, so using \"bash -lc 'echo hello'\" is optimal but calling \"echo hello\" directly feels safer. The sandbox makes sure environment variables like CODEX_SANDBOX_NETWORK_DISABLED=1 are set, so I won't ask for approval. I just need to run \"echo hello\" and correctly present the output."}} {"type":"item.completed","item":{"id":"item_1","item_type":"reasoning","text":"**Preparing for tool calls**\n\nI realize that I need to include a preamble before making any tool calls. So, I'll first state the preamble in the commentary channel, then proceed with the tool call. After that, I need to present the final message along with the output. It's possible that the CLI will show the output inline, but I must ensure that I present the result clearly regardless. Let's move forward and get this organized!"}} {"type":"item.completed","item":{"id":"item_2","item_type":"assistant_message","text":"Running `echo` to confirm shell access and print output."}} {"type":"item.started","item":{"id":"item_3","item_type":"command_execution","command":"bash -lc echo hello","aggregated_output":"","exit_code":null,"status":"in_progress"}} {"type":"item.completed","item":{"id":"item_3","item_type":"command_execution","command":"bash -lc echo hello","aggregated_output":"hello\n","exit_code":0,"status":"completed"}} {"type":"item.completed","item":{"id":"item_4","item_type":"assistant_message","text":"hello"}} ```
1 parent 7355ca4 commit 67aab04

File tree

3 files changed

+115
-18
lines changed

3 files changed

+115
-18
lines changed

codex-rs/exec/src/exec_events.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use ts_rs::TS;
88
pub enum ConversationEvent {
99
#[serde(rename = "session.created")]
1010
SessionCreated(SessionCreatedEvent),
11+
#[serde(rename = "item.started")]
12+
ItemStarted(ItemStartedEvent),
1113
#[serde(rename = "item.completed")]
1214
ItemCompleted(ItemCompletedEvent),
1315
#[serde(rename = "error")]
@@ -20,6 +22,12 @@ pub struct SessionCreatedEvent {
2022
pub session_id: String,
2123
}
2224

25+
/// Payload describing the start of an existing conversation item.
26+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
27+
pub struct ItemStartedEvent {
28+
pub item: ConversationItem,
29+
}
30+
2331
/// Payload describing the completion of an existing conversation item.
2432
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
2533
pub struct ItemCompletedEvent {
@@ -85,7 +93,8 @@ pub enum CommandExecutionStatus {
8593
pub struct CommandExecutionItem {
8694
pub command: String,
8795
pub aggregated_output: String,
88-
pub exit_code: i32,
96+
#[serde(skip_serializing_if = "Option::is_none")]
97+
pub exit_code: Option<i32>,
8998
pub status: CommandExecutionStatus,
9099
}
91100

codex-rs/exec/src/experimental_event_processor_with_json_output.rs

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::exec_events::ConversationItemDetails;
1515
use crate::exec_events::FileChangeItem;
1616
use crate::exec_events::FileUpdateChange;
1717
use crate::exec_events::ItemCompletedEvent;
18+
use crate::exec_events::ItemStartedEvent;
1819
use crate::exec_events::PatchApplyStatus;
1920
use crate::exec_events::PatchChangeKind;
2021
use crate::exec_events::ReasoningItem;
@@ -32,14 +33,22 @@ use codex_core::protocol::PatchApplyEndEvent;
3233
use codex_core::protocol::SessionConfiguredEvent;
3334
use codex_core::protocol::TaskCompleteEvent;
3435
use tracing::error;
36+
use tracing::warn;
3537

3638
pub struct ExperimentalEventProcessorWithJsonOutput {
3739
last_message_path: Option<PathBuf>,
3840
next_event_id: AtomicU64,
39-
running_commands: HashMap<String, Vec<String>>,
41+
// Tracks running commands by call_id, including the associated item id.
42+
running_commands: HashMap<String, RunningCommand>,
4043
running_patch_applies: HashMap<String, PatchApplyBeginEvent>,
4144
}
4245

46+
#[derive(Debug, Clone)]
47+
struct RunningCommand {
48+
command: String,
49+
item_id: String,
50+
}
51+
4352
impl ExperimentalEventProcessorWithJsonOutput {
4453
pub fn new(last_message_path: Option<PathBuf>) -> Self {
4554
Self {
@@ -114,10 +123,38 @@ impl ExperimentalEventProcessorWithJsonOutput {
114123
})]
115124
}
116125
fn handle_exec_command_begin(&mut self, ev: &ExecCommandBeginEvent) -> Vec<ConversationEvent> {
117-
self.running_commands
118-
.insert(ev.call_id.clone(), ev.command.clone());
126+
let item_id = self.get_next_item_id();
119127

120-
Vec::new()
128+
let command_string = match shlex::try_join(ev.command.iter().map(String::as_str)) {
129+
Ok(command_string) => command_string,
130+
Err(e) => {
131+
warn!(
132+
call_id = ev.call_id,
133+
"Failed to stringify command: {e:?}; skipping item.started"
134+
);
135+
ev.command.join(" ")
136+
}
137+
};
138+
139+
self.running_commands.insert(
140+
ev.call_id.clone(),
141+
RunningCommand {
142+
command: command_string.clone(),
143+
item_id: item_id.clone(),
144+
},
145+
);
146+
147+
let item = ConversationItem {
148+
id: item_id,
149+
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
150+
command: command_string,
151+
aggregated_output: String::new(),
152+
exit_code: None,
153+
status: CommandExecutionStatus::InProgress,
154+
}),
155+
};
156+
157+
vec![ConversationEvent::ItemStarted(ItemStartedEvent { item })]
121158
}
122159

123160
fn handle_patch_apply_begin(&mut self, ev: &PatchApplyBeginEvent) -> Vec<ConversationEvent> {
@@ -167,23 +204,26 @@ impl ExperimentalEventProcessorWithJsonOutput {
167204
}
168205

169206
fn handle_exec_command_end(&mut self, ev: &ExecCommandEndEvent) -> Vec<ConversationEvent> {
170-
let command = self
171-
.running_commands
172-
.remove(&ev.call_id)
173-
.map(|command| command.join(" "))
174-
.unwrap_or_default();
207+
let Some(RunningCommand { command, item_id }) = self.running_commands.remove(&ev.call_id)
208+
else {
209+
warn!(
210+
call_id = ev.call_id,
211+
"ExecCommandEnd without matching ExecCommandBegin; skipping item.completed"
212+
);
213+
return Vec::new();
214+
};
175215
let status = if ev.exit_code == 0 {
176216
CommandExecutionStatus::Completed
177217
} else {
178218
CommandExecutionStatus::Failed
179219
};
180220
let item = ConversationItem {
181-
id: self.get_next_item_id(),
221+
id: item_id,
182222

183223
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
184224
command,
185225
aggregated_output: ev.aggregated_output.clone(),
186-
exit_code: ev.exit_code,
226+
exit_code: Some(ev.exit_code),
187227
status,
188228
}),
189229
};

codex-rs/exec/tests/event_processor_with_json_output.rs

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use codex_exec::exec_events::ConversationEvent;
1616
use codex_exec::exec_events::ConversationItem;
1717
use codex_exec::exec_events::ConversationItemDetails;
1818
use codex_exec::exec_events::ItemCompletedEvent;
19+
use codex_exec::exec_events::ItemStartedEvent;
1920
use codex_exec::exec_events::PatchApplyStatus;
2021
use codex_exec::exec_events::PatchChangeKind;
2122
use codex_exec::exec_events::ReasoningItem;
@@ -156,7 +157,20 @@ fn exec_command_end_success_produces_completed_command_item() {
156157
}),
157158
);
158159
let out_begin = ep.collect_conversation_events(&begin);
159-
assert!(out_begin.is_empty());
160+
assert_eq!(
161+
out_begin,
162+
vec![ConversationEvent::ItemStarted(ItemStartedEvent {
163+
item: ConversationItem {
164+
id: "item_0".to_string(),
165+
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
166+
command: "bash -lc 'echo hi'".to_string(),
167+
aggregated_output: String::new(),
168+
exit_code: None,
169+
status: CommandExecutionStatus::InProgress,
170+
}),
171+
},
172+
})]
173+
);
160174

161175
// End (success) -> item.completed (item_0)
162176
let end_ok = event(
@@ -178,9 +192,9 @@ fn exec_command_end_success_produces_completed_command_item() {
178192
item: ConversationItem {
179193
id: "item_0".to_string(),
180194
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
181-
command: "bash -lc echo hi".to_string(),
195+
command: "bash -lc 'echo hi'".to_string(),
182196
aggregated_output: "hi\n".to_string(),
183-
exit_code: 0,
197+
exit_code: Some(0),
184198
status: CommandExecutionStatus::Completed,
185199
}),
186200
},
@@ -202,7 +216,20 @@ fn exec_command_end_failure_produces_failed_command_item() {
202216
parsed_cmd: Vec::new(),
203217
}),
204218
);
205-
assert!(ep.collect_conversation_events(&begin).is_empty());
219+
assert_eq!(
220+
ep.collect_conversation_events(&begin),
221+
vec![ConversationEvent::ItemStarted(ItemStartedEvent {
222+
item: ConversationItem {
223+
id: "item_0".to_string(),
224+
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
225+
command: "sh -c 'exit 1'".to_string(),
226+
aggregated_output: String::new(),
227+
exit_code: None,
228+
status: CommandExecutionStatus::InProgress,
229+
}),
230+
},
231+
})]
232+
);
206233

207234
// End (failure) -> item.completed (item_0)
208235
let end_fail = event(
@@ -224,16 +251,37 @@ fn exec_command_end_failure_produces_failed_command_item() {
224251
item: ConversationItem {
225252
id: "item_0".to_string(),
226253
details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
227-
command: "sh -c exit 1".to_string(),
254+
command: "sh -c 'exit 1'".to_string(),
228255
aggregated_output: String::new(),
229-
exit_code: 1,
256+
exit_code: Some(1),
230257
status: CommandExecutionStatus::Failed,
231258
}),
232259
},
233260
})]
234261
);
235262
}
236263

264+
#[test]
265+
fn exec_command_end_without_begin_is_ignored() {
266+
let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
267+
268+
// End event arrives without a prior Begin; should produce no conversation events.
269+
let end_only = event(
270+
"c1",
271+
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
272+
call_id: "no-begin".to_string(),
273+
stdout: String::new(),
274+
stderr: String::new(),
275+
aggregated_output: String::new(),
276+
exit_code: 0,
277+
duration: Duration::from_millis(1),
278+
formatted_output: String::new(),
279+
}),
280+
);
281+
let out = ep.collect_conversation_events(&end_only);
282+
assert!(out.is_empty());
283+
}
284+
237285
#[test]
238286
fn patch_apply_success_produces_item_completed_patchapply() {
239287
let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);

0 commit comments

Comments
 (0)