diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 0ecada0fb3c..b18930f3f9f 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1054,6 +1054,7 @@ dependencies = [ "codex-otel", "codex-protocol", "codex-rmcp-client", + "codex-utils-pty", "codex-utils-string", "core-foundation 0.9.4", "core_test_support", @@ -1070,7 +1071,6 @@ dependencies = [ "mcp-types", "openssl-sys", "os_info", - "portable-pty", "predicates", "pretty_assertions", "rand 0.9.2", @@ -1481,6 +1481,15 @@ dependencies = [ "toml", ] +[[package]] +name = "codex-utils-pty" +version = "0.0.0" +dependencies = [ + "anyhow", + "portable-pty", + "tokio", +] + [[package]] name = "codex-utils-readiness" version = "0.0.0" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 5286ef8702a..c0493c39d6a 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -35,6 +35,7 @@ members = [ "git-apply", "utils/json-to-toml", "utils/readiness", + "utils/pty", "utils/string", ] resolver = "2" @@ -78,6 +79,7 @@ codex-stdio-to-uds = { path = "stdio-to-uds" } codex-tui = { path = "tui" } codex-utils-json-to-toml = { path = "utils/json-to-toml" } codex-utils-readiness = { path = "utils/readiness" } +codex-utils-pty = { path = "utils/pty" } codex-utils-string = { path = "utils/string" } core_test_support = { path = "core/tests/common" } mcp-types = { path = "mcp-types" } diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 40e6a45fa35..0d9fe4b0891 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -631,6 +631,7 @@ impl CodexMessageProcessor { env, with_escalated_permissions: None, justification: None, + arg0: None, }; let effective_policy = params diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index d13205699e1..fdc1136f08e 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -28,6 +28,7 @@ codex-protocol = { workspace = true } codex-rmcp-client = { workspace = true } codex-async-utils = { workspace = true } codex-utils-string = { workspace = true } +codex-utils-pty = { workspace = true } dirs = { workspace = true } dunce = { workspace = true } env-flags = { workspace = true } @@ -37,7 +38,6 @@ indexmap = { workspace = true } libc = { workspace = true } mcp-types = { workspace = true } os_info = { workspace = true } -portable-pty = { workspace = true } rand = { workspace = true } regex-lite = { workspace = true } reqwest = { workspace = true, features = ["json", "stream"] } diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index de0b9d41513..aeea0129488 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -11,7 +11,7 @@ use crate::error::Result; use crate::error::RetryLimitReachedError; use crate::error::UnexpectedResponseError; use crate::model_family::ModelFamily; -use crate::openai_tools::create_tools_json_for_chat_completions_api; +use crate::tools::spec::create_tools_json_for_chat_completions_api; use crate::util::backoff; use bytes::Bytes; use codex_otel::otel_event_manager::OtelEventManager; diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index b215106d2b4..4143a734b54 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -45,12 +45,12 @@ use crate::model_family::ModelFamily; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::openai_model_info::get_model_info; -use crate::openai_tools::create_tools_json_for_responses_api; use crate::protocol::RateLimitSnapshot; use crate::protocol::RateLimitWindow; use crate::protocol::TokenUsage; use crate::state::TaskKind; use crate::token_data::PlanType; +use crate::tools::spec::create_tools_json_for_responses_api; use crate::util::backoff; use chrono::DateTime; use chrono::Utc; diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index d30da5192a9..397b09dd7c2 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -281,7 +281,7 @@ pub(crate) struct ResponsesApiRequest<'a> { } pub(crate) mod tools { - use crate::openai_tools::JsonSchema; + use crate::tools::spec::JsonSchema; use serde::Deserialize; use serde::Serialize; diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 2ce09970750..133083cf2fb 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -8,6 +8,7 @@ use crate::AuthManager; use crate::client_common::REVIEW_PROMPT; use crate::event_mapping::map_response_item_to_event_messages; use crate::function_tool::FunctionCallError; +use crate::parse_command::parse_command; use crate::review_format::format_review_findings_block; use crate::terminal; use crate::user_notification::UserNotifier; @@ -56,22 +57,14 @@ use crate::conversation_history::ConversationHistory; use crate::environment_context::EnvironmentContext; use crate::error::CodexErr; use crate::error::Result as CodexResult; -use crate::exec::ExecToolCallOutput; #[cfg(test)] use crate::exec::StreamOutput; -use crate::exec_command::ExecCommandParams; -use crate::exec_command::ExecSessionManager; -use crate::exec_command::WriteStdinParams; -use crate::executor::Executor; -use crate::executor::ExecutorConfig; -use crate::executor::normalize_exec_result; +// Removed: legacy executor wiring replaced by ToolOrchestrator flows. +// legacy normalize_exec_result no longer used after orchestrator migration use crate::mcp::auth::compute_auth_statuses; use crate::mcp_connection_manager::McpConnectionManager; use crate::model_family::find_family_for_model; use crate::openai_model_info::get_model_info; -use crate::openai_tools::ToolsConfig; -use crate::openai_tools::ToolsConfigParams; -use crate::parse_command::parse_command; use crate::project_doc::get_user_instructions; use crate::protocol::AgentMessageDeltaEvent; use crate::protocol::AgentReasoningDeltaEvent; @@ -84,13 +77,9 @@ use crate::protocol::ErrorEvent; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecApprovalRequestEvent; -use crate::protocol::ExecCommandBeginEvent; -use crate::protocol::ExecCommandEndEvent; use crate::protocol::InputItem; use crate::protocol::ListCustomPromptsResponseEvent; use crate::protocol::Op; -use crate::protocol::PatchApplyBeginEvent; -use crate::protocol::PatchApplyEndEvent; use crate::protocol::RateLimitSnapshot; use crate::protocol::ReviewDecision; use crate::protocol::ReviewOutputEvent; @@ -114,8 +103,10 @@ use crate::tasks::RegularTask; use crate::tasks::ReviewTask; use crate::tools::ToolRouter; use crate::tools::context::SharedTurnDiffTracker; -use crate::tools::format_exec_output_str; use crate::tools::parallel::ToolCallRuntime; +use crate::tools::sandboxing::ApprovalStore; +use crate::tools::spec::ToolsConfig; +use crate::tools::spec::ToolsConfigParams; use crate::turn_diff_tracker::TurnDiffTracker; use crate::unified_exec::UnifiedExecSessionManager; use crate::user_instructions::UserInstructions; @@ -272,6 +263,7 @@ pub(crate) struct TurnContext { pub(crate) tools_config: ToolsConfig, pub(crate) is_review_mode: bool, pub(crate) final_output_json_schema: Option, + pub(crate) codex_linux_sandbox_exe: Option, } impl TurnContext { @@ -404,6 +396,7 @@ impl Session { tools_config, is_review_mode: false, final_output_json_schema: None, + codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), } } @@ -561,19 +554,14 @@ impl Session { let services = SessionServices { mcp_connection_manager, - session_manager: ExecSessionManager::default(), unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(config.notify.clone()), rollout: Mutex::new(Some(rollout_recorder)), user_shell: default_shell, show_raw_agent_reasoning: config.show_raw_agent_reasoning, - executor: Executor::new(ExecutorConfig::new( - session_configuration.sandbox_policy.clone(), - session_configuration.cwd.clone(), - config.codex_linux_sandbox_exe.clone(), - )), auth_manager: Arc::clone(&auth_manager), otel_event_manager, + tool_approvals: Mutex::new(ApprovalStore::default()), }; let sess = Arc::new(Session { @@ -655,13 +643,12 @@ impl Session { } pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc { - let current_configuration = self.state.lock().await.session_configuration.clone(); - let session_configuration = current_configuration.apply(&updates); - - self.services.executor.update_environment( - session_configuration.sandbox_policy.clone(), - session_configuration.cwd.clone(), - ); + let session_configuration = { + let mut state = self.state.lock().await; + let session_configuration = state.session_configuration.clone().apply(&updates); + state.session_configuration = session_configuration.clone(); + session_configuration + }; let mut turn_context: TurnContext = Self::make_turn_context( Some(Arc::clone(&self.services.auth_manager)), @@ -965,168 +952,6 @@ impl Session { } } - async fn on_exec_command_begin( - &self, - turn_diff_tracker: SharedTurnDiffTracker, - exec_command_context: ExecCommandContext, - ) { - let ExecCommandContext { - sub_id, - call_id, - command_for_display, - cwd, - apply_patch, - .. - } = exec_command_context; - let msg = match apply_patch { - Some(ApplyPatchCommandContext { - user_explicitly_approved_this_action, - changes, - }) => { - { - let mut tracker = turn_diff_tracker.lock().await; - tracker.on_patch_begin(&changes); - } - - EventMsg::PatchApplyBegin(PatchApplyBeginEvent { - call_id, - auto_approved: !user_explicitly_approved_this_action, - changes, - }) - } - None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent { - call_id, - command: command_for_display.clone(), - cwd, - parsed_cmd: parse_command(&command_for_display), - }), - }; - let event = Event { - id: sub_id.to_string(), - msg, - }; - self.send_event(event).await; - } - - async fn on_exec_command_end( - &self, - turn_diff_tracker: SharedTurnDiffTracker, - sub_id: &str, - call_id: &str, - output: &ExecToolCallOutput, - is_apply_patch: bool, - ) { - let ExecToolCallOutput { - stdout, - stderr, - aggregated_output, - duration, - exit_code, - timed_out: _, - .. - } = output; - // Send full stdout/stderr to clients; do not truncate. - let stdout = stdout.text.clone(); - let stderr = stderr.text.clone(); - let formatted_output = format_exec_output_str(output); - let aggregated_output: String = aggregated_output.text.clone(); - - let msg = if is_apply_patch { - EventMsg::PatchApplyEnd(PatchApplyEndEvent { - call_id: call_id.to_string(), - stdout, - stderr, - success: *exit_code == 0, - }) - } else { - EventMsg::ExecCommandEnd(ExecCommandEndEvent { - call_id: call_id.to_string(), - stdout, - stderr, - aggregated_output, - exit_code: *exit_code, - duration: *duration, - formatted_output, - }) - }; - - let event = Event { - id: sub_id.to_string(), - msg, - }; - self.send_event(event).await; - - // If this is an apply_patch, after we emit the end patch, emit a second event - // with the full turn diff if there is one. - if is_apply_patch { - let unified_diff = { - let mut tracker = turn_diff_tracker.lock().await; - tracker.get_unified_diff() - }; - if let Ok(Some(unified_diff)) = unified_diff { - let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); - let event = Event { - id: sub_id.into(), - msg, - }; - self.send_event(event).await; - } - } - } - /// Runs the exec tool call and emits events for the begin and end of the - /// command even on error. - /// - /// Returns the output of the exec tool call. - pub(crate) async fn run_exec_with_events( - &self, - turn_diff_tracker: SharedTurnDiffTracker, - prepared: PreparedExec, - approval_policy: AskForApproval, - ) -> Result { - let PreparedExec { context, request } = prepared; - let is_apply_patch = context.apply_patch.is_some(); - let sub_id = context.sub_id.clone(); - let call_id = context.call_id.clone(); - - let begin_turn_diff = turn_diff_tracker.clone(); - let begin_context = context.clone(); - let session = self; - let result = self - .services - .executor - .run(request, self, approval_policy, &context, move || { - let turn_diff = begin_turn_diff.clone(); - let ctx = begin_context.clone(); - async move { - session.on_exec_command_begin(turn_diff, ctx).await; - } - }) - .await; - - if matches!( - &result, - Err(ExecError::Function(FunctionCallError::Denied(_))) - ) { - return result; - } - - let normalized = normalize_exec_result(&result); - let borrowed = normalized.event_output(); - - self.on_exec_command_end( - turn_diff_tracker, - &sub_id, - &call_id, - borrowed, - is_apply_patch, - ) - .await; - - drop(normalized); - - result - } - /// Helper that emits a BackgroundEvent with the given message. This keeps /// the call‑sites terse so adding more diagnostics does not clutter the /// core agent logic. @@ -1235,43 +1060,6 @@ impl Session { .parse_tool_name(tool_name) } - pub(crate) async fn handle_exec_command_tool( - &self, - params: ExecCommandParams, - ) -> Result { - let result = self - .services - .session_manager - .handle_exec_command_request(params) - .await; - match result { - Ok(output) => Ok(output.to_text_output()), - Err(err) => Err(FunctionCallError::RespondToModel(err)), - } - } - - pub(crate) async fn handle_write_stdin_tool( - &self, - params: WriteStdinParams, - ) -> Result { - self.services - .session_manager - .handle_write_stdin_request(params) - .await - .map(|output| output.to_text_output()) - .map_err(FunctionCallError::RespondToModel) - } - - pub(crate) async fn run_unified_exec_request( - &self, - request: crate::unified_exec::UnifiedExecRequest<'_>, - ) -> Result { - self.services - .unified_exec_manager - .handle_request(request) - .await - } - pub async fn interrupt_task(self: &Arc) { info!("interrupt received: abort current task, if any"); self.abort_all_tasks(TurnAbortReason::Interrupted).await; @@ -1633,6 +1421,7 @@ async fn spawn_review_thread( cwd: parent_turn_context.cwd.clone(), is_review_mode: true, final_output_json_schema: None, + codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), }; // Seed the child task with the review prompt as the initial user message. @@ -2511,10 +2300,6 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool { error.to_string().contains("Auth required") } -use crate::executor::errors::ExecError; -use crate::executor::linkers::PreparedExec; -use crate::tools::context::ApplyPatchCommandContext; -use crate::tools::context::ExecCommandContext; #[cfg(test)] pub(crate) use tests::make_session_and_context; @@ -2523,6 +2308,8 @@ mod tests { use super::*; use crate::config::ConfigOverrides; use crate::config::ConfigToml; + use crate::exec::ExecToolCallOutput; + use crate::tools::format_exec_output_str; use crate::protocol::CompactedItem; use crate::protocol::InitialHistory; @@ -2827,19 +2614,14 @@ mod tests { let services = SessionServices { mcp_connection_manager: McpConnectionManager::default(), - session_manager: ExecSessionManager::default(), unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, - executor: Executor::new(ExecutorConfig::new( - session_configuration.sandbox_policy.clone(), - session_configuration.cwd.clone(), - None, - )), auth_manager: Arc::clone(&auth_manager), otel_event_manager: otel_event_manager.clone(), + tool_approvals: Mutex::new(ApprovalStore::default()), }; let session = Session { @@ -2898,19 +2680,14 @@ mod tests { let services = SessionServices { mcp_connection_manager: McpConnectionManager::default(), - session_manager: ExecSessionManager::default(), unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, - executor: Executor::new(ExecutorConfig::new( - session_configuration.sandbox_policy.clone(), - session_configuration.cwd.clone(), - None, - )), auth_manager: Arc::clone(&auth_manager), otel_event_manager: otel_event_manager.clone(), + tool_approvals: Mutex::new(ApprovalStore::default()), }; let session = Arc::new(Session { @@ -3258,6 +3035,7 @@ mod tests { env: HashMap::new(), with_escalated_permissions: Some(true), justification: Some("test".to_string()), + arg0: None, }; let params2 = ExecParams { diff --git a/codex-rs/core/src/command_safety/is_safe_command.rs b/codex-rs/core/src/command_safety/is_safe_command.rs index c1273caa94d..f0767449068 100644 --- a/codex-rs/core/src/command_safety/is_safe_command.rs +++ b/codex-rs/core/src/command_safety/is_safe_command.rs @@ -1,15 +1,25 @@ use crate::bash::parse_bash_lc_plain_commands; pub fn is_known_safe_command(command: &[String]) -> bool { + let command: Vec = command + .iter() + .map(|s| { + if s == "zsh" { + "bash".to_string() + } else { + s.clone() + } + }) + .collect(); #[cfg(target_os = "windows")] { use super::windows_safe_commands::is_safe_command_windows; - if is_safe_command_windows(command) { + if is_safe_command_windows(&command) { return true; } } - if is_safe_to_call_with_exec(command) { + if is_safe_to_call_with_exec(&command) { return true; } @@ -19,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool { // introduce side effects ( "&&", "||", ";", and "|" ). If every // individual command in the script is itself a known‑safe command, then // the composite expression is considered safe. - if let Some(all_commands) = parse_bash_lc_plain_commands(command) + if let Some(all_commands) = parse_bash_lc_plain_commands(&command) && !all_commands.is_empty() && all_commands .iter() @@ -31,9 +41,14 @@ pub fn is_known_safe_command(command: &[String]) -> bool { } fn is_safe_to_call_with_exec(command: &[String]) -> bool { - let cmd0 = command.first().map(String::as_str); + let Some(cmd0) = command.first().map(String::as_str) else { + return false; + }; - match cmd0 { + match std::path::Path::new(&cmd0) + .file_name() + .and_then(|osstr| osstr.to_str()) + { #[rustfmt::skip] Some( "cat" | @@ -103,13 +118,12 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool { // Rust Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true, - // Special-case `sed -n {N|M,N}p FILE` + // Special-case `sed -n {N|M,N}p` Some("sed") if { - command.len() == 4 + command.len() <= 4 && command.get(1).map(String::as_str) == Some("-n") && is_valid_sed_n_arg(command.get(2).map(String::as_str)) - && command.get(3).map(String::is_empty) == Some(false) } => { true diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 22d1deeb1c7..d2fa9735d06 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -18,13 +18,14 @@ use tokio::process::Child; use crate::error::CodexErr; use crate::error::Result; use crate::error::SandboxErr; -use crate::landlock::spawn_command_under_linux_sandbox; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecCommandOutputDeltaEvent; use crate::protocol::ExecOutputStream; use crate::protocol::SandboxPolicy; -use crate::seatbelt::spawn_command_under_seatbelt; +use crate::sandboxing::CommandSpec; +use crate::sandboxing::ExecEnv; +use crate::sandboxing::SandboxManager; use crate::spawn::StdioPolicy; use crate::spawn::spawn_child_async; @@ -53,6 +54,7 @@ pub struct ExecParams { pub env: HashMap, pub with_escalated_permissions: Option, pub justification: Option, + pub arg0: Option, } impl ExecParams { @@ -87,57 +89,85 @@ pub async fn process_exec_tool_call( codex_linux_sandbox_exe: &Option, stdout_stream: Option, ) -> Result { - let start = Instant::now(); + let ExecParams { + command, + cwd, + timeout_ms, + env, + with_escalated_permissions, + justification, + arg0: _, + } = params; - let timeout_duration = params.timeout_duration(); + let (program, args) = command.split_first().ok_or_else(|| { + CodexErr::Io(io::Error::new( + io::ErrorKind::InvalidInput, + "command args are empty", + )) + })?; - let raw_output_result: std::result::Result = match sandbox_type - { - SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await, - SandboxType::MacosSeatbelt => { - let ExecParams { - command, - cwd: command_cwd, - env, - .. - } = params; - let child = spawn_command_under_seatbelt( - command, - command_cwd, - sandbox_policy, - sandbox_cwd, - StdioPolicy::RedirectForShellTool, - env, - ) - .await?; - consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await - } - SandboxType::LinuxSeccomp => { - let ExecParams { - command, - cwd: command_cwd, - env, - .. - } = params; - - let codex_linux_sandbox_exe = codex_linux_sandbox_exe - .as_ref() - .ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?; - let child = spawn_command_under_linux_sandbox( - codex_linux_sandbox_exe, - command, - command_cwd, - sandbox_policy, - sandbox_cwd, - StdioPolicy::RedirectForShellTool, - env, - ) - .await?; - - consume_truncated_output(child, timeout_duration, stdout_stream).await - } + let spec = CommandSpec { + program: program.clone(), + args: args.to_vec(), + cwd, + env, + timeout_ms, + with_escalated_permissions, + justification, }; + + let manager = SandboxManager::new(); + let exec_env = manager + .transform( + &spec, + sandbox_policy, + sandbox_type, + sandbox_cwd, + codex_linux_sandbox_exe.as_ref(), + ) + .map_err(CodexErr::from)?; + + // Route through the sandboxing module for a single, unified execution path. + crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await +} + +pub(crate) async fn execute_exec_env( + env: ExecEnv, + sandbox_policy: &SandboxPolicy, + stdout_stream: Option, +) -> Result { + let ExecEnv { + command, + cwd, + env, + timeout_ms, + sandbox, + with_escalated_permissions, + justification, + arg0, + } = env; + + let params = ExecParams { + command, + cwd, + timeout_ms, + env, + with_escalated_permissions, + justification, + arg0, + }; + + let start = Instant::now(); + let raw_output_result = exec(params, sandbox_policy, stdout_stream).await; let duration = start.elapsed(); + finalize_exec_result(raw_output_result, sandbox, duration) +} + +fn finalize_exec_result( + raw_output_result: std::result::Result, + sandbox_type: SandboxType, + duration: Duration, +) -> Result { match raw_output_result { Ok(raw_output) => { #[allow(unused_mut)] @@ -192,12 +222,30 @@ pub async fn process_exec_tool_call( } } +pub(crate) mod errors { + use super::CodexErr; + use crate::sandboxing::SandboxTransformError; + + impl From for CodexErr { + fn from(err: SandboxTransformError) -> Self { + match err { + SandboxTransformError::MissingLinuxSandboxExecutable => { + CodexErr::LandlockSandboxExecutableNotProvided + } + } + } + } +} + /// We don't have a fully deterministic way to tell if our command failed /// because of the sandbox - a command in the user's zshrc file might hit an /// error, but the command itself might fail or succeed for other reasons. /// For now, we conservatively check for well known command failure exit codes and /// also look for common sandbox denial keywords in the command output. -fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool { +pub(crate) fn is_likely_sandbox_denied( + sandbox_type: SandboxType, + exec_output: &ExecToolCallOutput, +) -> bool { if sandbox_type == SandboxType::None || exec_output.exit_code == 0 { return false; } @@ -206,21 +254,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal // 2: misuse of shell builtins // 126: permission denied // 127: command not found - const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127]; - if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) { - return false; - } - - const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [ + const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [ "operation not permitted", "permission denied", "read-only file system", "seccomp", "sandbox", "landlock", + "failed to write file", ]; - if [ + let has_sandbox_keyword = [ &exec_output.stderr.text, &exec_output.stdout.text, &exec_output.aggregated_output.text, @@ -231,10 +275,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal SANDBOX_DENIED_KEYWORDS .iter() .any(|needle| lower.contains(needle)) - }) { + }); + + if has_sandbox_keyword { return true; } + const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127]; + if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) { + return false; + } + #[cfg(unix)] { const SIGSYS_CODE: i32 = libc::SIGSYS; @@ -248,11 +299,12 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal false } -#[derive(Debug)] -pub struct StreamOutput { +#[derive(Debug, Clone)] +pub struct StreamOutput { pub text: T, pub truncated_after_lines: Option, } + #[derive(Debug)] struct RawExecToolCallOutput { pub exit_status: ExitStatus, @@ -285,7 +337,7 @@ fn append_all(dst: &mut Vec, src: &[u8]) { dst.extend_from_slice(src); } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct ExecToolCallOutput { pub exit_code: i32, pub stdout: StreamOutput, @@ -302,7 +354,11 @@ async fn exec( ) -> Result { let timeout = params.timeout_duration(); let ExecParams { - command, cwd, env, .. + command, + cwd, + env, + arg0, + .. } = params; let (program, args) = command.split_first().ok_or_else(|| { @@ -311,11 +367,11 @@ async fn exec( "command args are empty", )) })?; - let arg0 = None; + let arg0_ref = arg0.as_deref(); let child = spawn_child_async( PathBuf::from(program), args.into(), - arg0, + arg0_ref, cwd, sandbox_policy, StdioPolicy::RedirectForShellTool, diff --git a/codex-rs/core/src/exec_command/exec_command_params.rs b/codex-rs/core/src/exec_command/exec_command_params.rs deleted file mode 100644 index 11a3fd45967..00000000000 --- a/codex-rs/core/src/exec_command/exec_command_params.rs +++ /dev/null @@ -1,57 +0,0 @@ -use serde::Deserialize; -use serde::Serialize; - -use crate::exec_command::session_id::SessionId; - -#[derive(Debug, Clone, Deserialize)] -pub struct ExecCommandParams { - pub(crate) cmd: String, - - #[serde(default = "default_yield_time")] - pub(crate) yield_time_ms: u64, - - #[serde(default = "max_output_tokens")] - pub(crate) max_output_tokens: u64, - - #[serde(default = "default_shell")] - pub(crate) shell: String, - - #[serde(default = "default_login")] - pub(crate) login: bool, -} - -fn default_yield_time() -> u64 { - 10_000 -} - -fn max_output_tokens() -> u64 { - 10_000 -} - -fn default_login() -> bool { - true -} - -fn default_shell() -> String { - "/bin/bash".to_string() -} - -#[derive(Debug, Deserialize, Serialize)] -pub struct WriteStdinParams { - pub(crate) session_id: SessionId, - pub(crate) chars: String, - - #[serde(default = "write_stdin_default_yield_time_ms")] - pub(crate) yield_time_ms: u64, - - #[serde(default = "write_stdin_default_max_output_tokens")] - pub(crate) max_output_tokens: u64, -} - -fn write_stdin_default_yield_time_ms() -> u64 { - 250 -} - -fn write_stdin_default_max_output_tokens() -> u64 { - 10_000 -} diff --git a/codex-rs/core/src/exec_command/exec_command_session.rs b/codex-rs/core/src/exec_command/exec_command_session.rs deleted file mode 100644 index 31b3c929b28..00000000000 --- a/codex-rs/core/src/exec_command/exec_command_session.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::sync::Mutex as StdMutex; - -use tokio::sync::broadcast; -use tokio::sync::mpsc; -use tokio::task::JoinHandle; - -#[derive(Debug)] -pub(crate) struct ExecCommandSession { - /// Queue for writing bytes to the process stdin (PTY master write side). - writer_tx: mpsc::Sender>, - /// Broadcast stream of output chunks read from the PTY. New subscribers - /// receive only chunks emitted after they subscribe. - output_tx: broadcast::Sender>, - - /// Child killer handle for termination on drop (can signal independently - /// of a thread blocked in `.wait()`). - killer: StdMutex>>, - - /// JoinHandle for the blocking PTY reader task. - reader_handle: StdMutex>>, - - /// JoinHandle for the stdin writer task. - writer_handle: StdMutex>>, - - /// JoinHandle for the child wait task. - wait_handle: StdMutex>>, - - /// Tracks whether the underlying process has exited. - exit_status: std::sync::Arc, -} - -impl ExecCommandSession { - pub(crate) fn new( - writer_tx: mpsc::Sender>, - output_tx: broadcast::Sender>, - killer: Box, - reader_handle: JoinHandle<()>, - writer_handle: JoinHandle<()>, - wait_handle: JoinHandle<()>, - exit_status: std::sync::Arc, - ) -> (Self, broadcast::Receiver>) { - let initial_output_rx = output_tx.subscribe(); - ( - Self { - writer_tx, - output_tx, - killer: StdMutex::new(Some(killer)), - reader_handle: StdMutex::new(Some(reader_handle)), - writer_handle: StdMutex::new(Some(writer_handle)), - wait_handle: StdMutex::new(Some(wait_handle)), - exit_status, - }, - initial_output_rx, - ) - } - - pub(crate) fn writer_sender(&self) -> mpsc::Sender> { - self.writer_tx.clone() - } - - pub(crate) fn output_receiver(&self) -> broadcast::Receiver> { - self.output_tx.subscribe() - } - - pub(crate) fn has_exited(&self) -> bool { - self.exit_status.load(std::sync::atomic::Ordering::SeqCst) - } -} - -impl Drop for ExecCommandSession { - fn drop(&mut self) { - // Best-effort: terminate child first so blocking tasks can complete. - if let Ok(mut killer_opt) = self.killer.lock() - && let Some(mut killer) = killer_opt.take() - { - let _ = killer.kill(); - } - - // Abort background tasks; they may already have exited after kill. - if let Ok(mut h) = self.reader_handle.lock() - && let Some(handle) = h.take() - { - handle.abort(); - } - if let Ok(mut h) = self.writer_handle.lock() - && let Some(handle) = h.take() - { - handle.abort(); - } - if let Ok(mut h) = self.wait_handle.lock() - && let Some(handle) = h.take() - { - handle.abort(); - } - } -} diff --git a/codex-rs/core/src/exec_command/mod.rs b/codex-rs/core/src/exec_command/mod.rs deleted file mode 100644 index 2cfd022510a..00000000000 --- a/codex-rs/core/src/exec_command/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -mod exec_command_params; -mod exec_command_session; -mod responses_api; -mod session_id; -mod session_manager; - -pub use exec_command_params::ExecCommandParams; -pub use exec_command_params::WriteStdinParams; -pub(crate) use exec_command_session::ExecCommandSession; -pub use responses_api::EXEC_COMMAND_TOOL_NAME; -pub use responses_api::WRITE_STDIN_TOOL_NAME; -pub use responses_api::create_exec_command_tool_for_responses_api; -pub use responses_api::create_write_stdin_tool_for_responses_api; -pub use session_manager::SessionManager as ExecSessionManager; diff --git a/codex-rs/core/src/exec_command/responses_api.rs b/codex-rs/core/src/exec_command/responses_api.rs deleted file mode 100644 index 24f6d35c576..00000000000 --- a/codex-rs/core/src/exec_command/responses_api.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::collections::BTreeMap; - -use crate::client_common::tools::ResponsesApiTool; -use crate::openai_tools::JsonSchema; - -pub const EXEC_COMMAND_TOOL_NAME: &str = "exec_command"; -pub const WRITE_STDIN_TOOL_NAME: &str = "write_stdin"; - -pub fn create_exec_command_tool_for_responses_api() -> ResponsesApiTool { - let mut properties = BTreeMap::::new(); - properties.insert( - "cmd".to_string(), - JsonSchema::String { - description: Some("The shell command to execute.".to_string()), - }, - ); - properties.insert( - "yield_time_ms".to_string(), - JsonSchema::Number { - description: Some("The maximum time in milliseconds to wait for output.".to_string()), - }, - ); - properties.insert( - "max_output_tokens".to_string(), - JsonSchema::Number { - description: Some("The maximum number of tokens to output.".to_string()), - }, - ); - properties.insert( - "shell".to_string(), - JsonSchema::String { - description: Some("The shell to use. Defaults to \"/bin/bash\".".to_string()), - }, - ); - properties.insert( - "login".to_string(), - JsonSchema::Boolean { - description: Some( - "Whether to run the command as a login shell. Defaults to true.".to_string(), - ), - }, - ); - - ResponsesApiTool { - name: EXEC_COMMAND_TOOL_NAME.to_owned(), - description: r#"Execute shell commands on the local machine with streaming output."# - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["cmd".to_string()]), - additional_properties: Some(false.into()), - }, - } -} - -pub fn create_write_stdin_tool_for_responses_api() -> ResponsesApiTool { - let mut properties = BTreeMap::::new(); - properties.insert( - "session_id".to_string(), - JsonSchema::Number { - description: Some("The ID of the exec_command session.".to_string()), - }, - ); - properties.insert( - "chars".to_string(), - JsonSchema::String { - description: Some("The characters to write to stdin.".to_string()), - }, - ); - properties.insert( - "yield_time_ms".to_string(), - JsonSchema::Number { - description: Some( - "The maximum time in milliseconds to wait for output after writing.".to_string(), - ), - }, - ); - properties.insert( - "max_output_tokens".to_string(), - JsonSchema::Number { - description: Some("The maximum number of tokens to output.".to_string()), - }, - ); - - ResponsesApiTool { - name: WRITE_STDIN_TOOL_NAME.to_owned(), - description: r#"Write characters to an exec session's stdin. Returns all stdout+stderr received within yield_time_ms. -Can write control characters (\u0003 for Ctrl-C), or an empty string to just poll stdout+stderr."# - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["session_id".to_string(), "chars".to_string()]), - additional_properties: Some(false.into()), - }, - } -} diff --git a/codex-rs/core/src/exec_command/session_id.rs b/codex-rs/core/src/exec_command/session_id.rs deleted file mode 100644 index c97c5d54408..00000000000 --- a/codex-rs/core/src/exec_command/session_id.rs +++ /dev/null @@ -1,5 +0,0 @@ -use serde::Deserialize; -use serde::Serialize; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub(crate) struct SessionId(pub u32); diff --git a/codex-rs/core/src/exec_command/session_manager.rs b/codex-rs/core/src/exec_command/session_manager.rs deleted file mode 100644 index cd1c5329a2e..00000000000 --- a/codex-rs/core/src/exec_command/session_manager.rs +++ /dev/null @@ -1,506 +0,0 @@ -use std::collections::HashMap; -use std::io::ErrorKind; -use std::io::Read; -use std::sync::Arc; -use std::sync::Mutex as StdMutex; -use std::sync::atomic::AtomicBool; -use std::sync::atomic::AtomicU32; - -use portable_pty::CommandBuilder; -use portable_pty::PtySize; -use portable_pty::native_pty_system; -use tokio::sync::Mutex; -use tokio::sync::mpsc; -use tokio::sync::oneshot; -use tokio::time::Duration; -use tokio::time::Instant; -use tokio::time::timeout; - -use crate::exec_command::exec_command_params::ExecCommandParams; -use crate::exec_command::exec_command_params::WriteStdinParams; -use crate::exec_command::exec_command_session::ExecCommandSession; -use crate::exec_command::session_id::SessionId; -use crate::truncate::truncate_middle; - -#[derive(Debug, Default)] -pub struct SessionManager { - next_session_id: AtomicU32, - sessions: Mutex>, -} - -#[derive(Debug)] -pub struct ExecCommandOutput { - wall_time: Duration, - exit_status: ExitStatus, - original_token_count: Option, - output: String, -} - -impl ExecCommandOutput { - pub(crate) fn to_text_output(&self) -> String { - let wall_time_secs = self.wall_time.as_secs_f32(); - let termination_status = match self.exit_status { - ExitStatus::Exited(code) => format!("Process exited with code {code}"), - ExitStatus::Ongoing(session_id) => { - format!("Process running with session ID {}", session_id.0) - } - }; - let truncation_status = match self.original_token_count { - Some(tokens) => { - format!("\nWarning: truncated output (original token count: {tokens})") - } - None => "".to_string(), - }; - format!( - r#"Wall time: {wall_time_secs:.3} seconds -{termination_status}{truncation_status} -Output: -{output}"#, - output = self.output - ) - } -} - -#[derive(Debug)] -pub enum ExitStatus { - Exited(i32), - Ongoing(SessionId), -} - -impl SessionManager { - /// Processes the request and is required to send a response via `outgoing`. - pub async fn handle_exec_command_request( - &self, - params: ExecCommandParams, - ) -> Result { - // Allocate a session id. - let session_id = SessionId( - self.next_session_id - .fetch_add(1, std::sync::atomic::Ordering::SeqCst), - ); - - let (session, mut output_rx, mut exit_rx) = create_exec_command_session(params.clone()) - .await - .map_err(|err| { - format!( - "failed to create exec command session for session id {}: {err}", - session_id.0 - ) - })?; - - // Insert into session map. - self.sessions.lock().await.insert(session_id, session); - - // Collect output until either timeout expires or process exits. - // Do not cap during collection; truncate at the end if needed. - // Use a modest initial capacity to avoid large preallocation. - let cap_bytes_u64 = params.max_output_tokens.saturating_mul(4); - let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize; - let mut collected: Vec = Vec::with_capacity(4096); - - let start_time = Instant::now(); - let deadline = start_time + Duration::from_millis(params.yield_time_ms); - let mut exit_code: Option = None; - - loop { - if Instant::now() >= deadline { - break; - } - let remaining = deadline.saturating_duration_since(Instant::now()); - tokio::select! { - biased; - exit = &mut exit_rx => { - exit_code = exit.ok(); - // Small grace period to pull remaining buffered output - let grace_deadline = Instant::now() + Duration::from_millis(25); - while Instant::now() < grace_deadline { - match timeout(Duration::from_millis(1), output_rx.recv()).await { - Ok(Ok(chunk)) => { - collected.extend_from_slice(&chunk); - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => { - // Skip missed messages; keep trying within grace period. - continue; - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break, - Err(_) => break, - } - } - break; - } - chunk = timeout(remaining, output_rx.recv()) => { - match chunk { - Ok(Ok(chunk)) => { - collected.extend_from_slice(&chunk); - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => { - // Skip missed messages; continue collecting fresh output. - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => { break; } - Err(_) => { break; } - } - } - } - } - - let output = String::from_utf8_lossy(&collected).to_string(); - - let exit_status = if let Some(code) = exit_code { - ExitStatus::Exited(code) - } else { - ExitStatus::Ongoing(session_id) - }; - - // If output exceeds cap, truncate the middle and record original token estimate. - let (output, original_token_count) = truncate_middle(&output, cap_bytes); - Ok(ExecCommandOutput { - wall_time: Instant::now().duration_since(start_time), - exit_status, - original_token_count, - output, - }) - } - - /// Write characters to a session's stdin and collect combined output for up to `yield_time_ms`. - pub async fn handle_write_stdin_request( - &self, - params: WriteStdinParams, - ) -> Result { - let WriteStdinParams { - session_id, - chars, - yield_time_ms, - max_output_tokens, - } = params; - - // Grab handles without holding the sessions lock across await points. - let (writer_tx, mut output_rx) = { - let sessions = self.sessions.lock().await; - match sessions.get(&session_id) { - Some(session) => (session.writer_sender(), session.output_receiver()), - None => { - return Err(format!("unknown session id {}", session_id.0)); - } - } - }; - - // Write stdin if provided. - if !chars.is_empty() && writer_tx.send(chars.into_bytes()).await.is_err() { - return Err("failed to write to stdin".to_string()); - } - - // Collect output up to yield_time_ms, truncating to max_output_tokens bytes. - let mut collected: Vec = Vec::with_capacity(4096); - let start_time = Instant::now(); - let deadline = start_time + Duration::from_millis(yield_time_ms); - loop { - let now = Instant::now(); - if now >= deadline { - break; - } - let remaining = deadline - now; - match timeout(remaining, output_rx.recv()).await { - Ok(Ok(chunk)) => { - // Collect all output within the time budget; truncate at the end. - collected.extend_from_slice(&chunk); - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => { - // Skip missed messages; continue collecting fresh output. - } - Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break, - Err(_) => break, // timeout - } - } - - // Return structured output, truncating middle if over cap. - let output = String::from_utf8_lossy(&collected).to_string(); - let cap_bytes_u64 = max_output_tokens.saturating_mul(4); - let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize; - let (output, original_token_count) = truncate_middle(&output, cap_bytes); - Ok(ExecCommandOutput { - wall_time: Instant::now().duration_since(start_time), - exit_status: ExitStatus::Ongoing(session_id), - original_token_count, - output, - }) - } -} - -/// Spawn PTY and child process per spawn_exec_command_session logic. -async fn create_exec_command_session( - params: ExecCommandParams, -) -> anyhow::Result<( - ExecCommandSession, - tokio::sync::broadcast::Receiver>, - oneshot::Receiver, -)> { - let ExecCommandParams { - cmd, - yield_time_ms: _, - max_output_tokens: _, - shell, - login, - } = params; - - // Use the native pty implementation for the system - let pty_system = native_pty_system(); - - // Create a new pty - let pair = pty_system.openpty(PtySize { - rows: 24, - cols: 80, - pixel_width: 0, - pixel_height: 0, - })?; - - // Spawn a shell into the pty - let mut command_builder = CommandBuilder::new(shell); - let shell_mode_opt = if login { "-lc" } else { "-c" }; - command_builder.arg(shell_mode_opt); - command_builder.arg(cmd); - - let mut child = pair.slave.spawn_command(command_builder)?; - // Obtain a killer that can signal the process independently of `.wait()`. - let killer = child.clone_killer(); - - // Channel to forward write requests to the PTY writer. - let (writer_tx, mut writer_rx) = mpsc::channel::>(128); - // Broadcast for streaming PTY output to readers: subscribers receive from subscription time. - let (output_tx, _) = tokio::sync::broadcast::channel::>(256); - // Reader task: drain PTY and forward chunks to output channel. - let mut reader = pair.master.try_clone_reader()?; - let output_tx_clone = output_tx.clone(); - let reader_handle = tokio::task::spawn_blocking(move || { - let mut buf = [0u8; 8192]; - loop { - match reader.read(&mut buf) { - Ok(0) => break, // EOF - Ok(n) => { - // Forward to broadcast; best-effort if there are subscribers. - let _ = output_tx_clone.send(buf[..n].to_vec()); - } - Err(ref e) if e.kind() == ErrorKind::Interrupted => { - // Retry on EINTR - continue; - } - Err(ref e) if e.kind() == ErrorKind::WouldBlock => { - // We're in a blocking thread; back off briefly and retry. - std::thread::sleep(Duration::from_millis(5)); - continue; - } - Err(_) => break, - } - } - }); - - // Writer task: apply stdin writes to the PTY writer. - let writer = pair.master.take_writer()?; - let writer = Arc::new(StdMutex::new(writer)); - let writer_handle = tokio::spawn({ - let writer = writer.clone(); - async move { - while let Some(bytes) = writer_rx.recv().await { - let writer = writer.clone(); - // Perform blocking write on a blocking thread. - let _ = tokio::task::spawn_blocking(move || { - if let Ok(mut guard) = writer.lock() { - use std::io::Write; - let _ = guard.write_all(&bytes); - let _ = guard.flush(); - } - }) - .await; - } - } - }); - - // Keep the child alive until it exits, then signal exit code. - let (exit_tx, exit_rx) = oneshot::channel::(); - let exit_status = Arc::new(AtomicBool::new(false)); - let wait_exit_status = exit_status.clone(); - let wait_handle = tokio::task::spawn_blocking(move || { - let code = match child.wait() { - Ok(status) => status.exit_code() as i32, - Err(_) => -1, - }; - wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst); - let _ = exit_tx.send(code); - }); - - // Create and store the session with channels. - let (session, initial_output_rx) = ExecCommandSession::new( - writer_tx, - output_tx, - killer, - reader_handle, - writer_handle, - wait_handle, - exit_status, - ); - Ok((session, initial_output_rx, exit_rx)) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::exec_command::session_id::SessionId; - - /// Test that verifies that [`SessionManager::handle_exec_command_request()`] - /// and [`SessionManager::handle_write_stdin_request()`] work as expected - /// in the presence of a process that never terminates (but produces - /// output continuously). - #[cfg(unix)] - #[allow(clippy::print_stderr)] - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] - async fn session_manager_streams_and_truncates_from_now() { - use crate::exec_command::exec_command_params::ExecCommandParams; - use crate::exec_command::exec_command_params::WriteStdinParams; - use tokio::time::sleep; - - let session_manager = SessionManager::default(); - // Long-running loop that prints an increasing counter every ~100ms. - // Use Python for a portable, reliable sleep across shells/PTYs. - let cmd = r#"python3 - <<'PY' -import sys, time -count = 0 -while True: - print(count) - sys.stdout.flush() - count += 100 - time.sleep(0.1) -PY"# - .to_string(); - - // Start the session and collect ~3s of output. - let params = ExecCommandParams { - cmd, - yield_time_ms: 3_000, - max_output_tokens: 1_000, // large enough to avoid truncation here - shell: "/bin/bash".to_string(), - login: false, - }; - let initial_output = match session_manager - .handle_exec_command_request(params.clone()) - .await - { - Ok(v) => v, - Err(e) => { - // PTY may be restricted in some sandboxes; skip in that case. - if e.contains("openpty") || e.contains("Operation not permitted") { - eprintln!("skipping test due to restricted PTY: {e}"); - return; - } - panic!("exec request failed unexpectedly: {e}"); - } - }; - eprintln!("initial output: {initial_output:?}"); - - // Should be ongoing (we launched a never-ending loop). - let session_id = match initial_output.exit_status { - ExitStatus::Ongoing(id) => id, - _ => panic!("expected ongoing session"), - }; - - // Parse the numeric lines and get the max observed value in the first window. - let first_nums = extract_monotonic_numbers(&initial_output.output); - assert!( - !first_nums.is_empty(), - "expected some output from first window" - ); - let first_max = *first_nums.iter().max().unwrap(); - - // Wait ~4s so counters progress while we're not reading. - sleep(Duration::from_millis(4_000)).await; - - // Now read ~3s of output "from now" only. - // Use a small token cap so truncation occurs and we test middle truncation. - let write_params = WriteStdinParams { - session_id, - chars: String::new(), - yield_time_ms: 3_000, - max_output_tokens: 16, // 16 tokens ~= 64 bytes -> likely truncation - }; - let second = session_manager - .handle_write_stdin_request(write_params) - .await - .expect("write stdin should succeed"); - - // Verify truncation metadata and size bound (cap is tokens*4 bytes). - assert!(second.original_token_count.is_some()); - let cap_bytes = (16u64 * 4) as usize; - assert!(second.output.len() <= cap_bytes); - // New middle marker should be present. - assert!( - second.output.contains("tokens truncated") && second.output.contains('…'), - "expected truncation marker in output, got: {}", - second.output - ); - - // Minimal freshness check: the earliest number we see in the second window - // should be significantly larger than the last from the first window. - let second_nums = extract_monotonic_numbers(&second.output); - assert!( - !second_nums.is_empty(), - "expected some numeric output from second window" - ); - let second_min = *second_nums.iter().min().unwrap(); - - // We slept 4 seconds (~40 ticks at 100ms/tick, each +100), so expect - // an increase of roughly 4000 or more. Allow a generous margin. - assert!( - second_min >= first_max + 2000, - "second_min={second_min} first_max={first_max}", - ); - } - - #[cfg(unix)] - fn extract_monotonic_numbers(s: &str) -> Vec { - s.lines() - .filter_map(|line| { - if !line.is_empty() - && line.chars().all(|c| c.is_ascii_digit()) - && let Ok(n) = line.parse::() - { - // Our generator increments by 100; ignore spurious fragments. - if n % 100 == 0 { - return Some(n); - } - } - None - }) - .collect() - } - - #[test] - fn to_text_output_exited_no_truncation() { - let out = ExecCommandOutput { - wall_time: Duration::from_millis(1234), - exit_status: ExitStatus::Exited(0), - original_token_count: None, - output: "hello".to_string(), - }; - let text = out.to_text_output(); - let expected = r#"Wall time: 1.234 seconds -Process exited with code 0 -Output: -hello"#; - assert_eq!(expected, text); - } - - #[test] - fn to_text_output_ongoing_with_truncation() { - let out = ExecCommandOutput { - wall_time: Duration::from_millis(500), - exit_status: ExitStatus::Ongoing(SessionId(42)), - original_token_count: Some(1000), - output: "abc".to_string(), - }; - let text = out.to_text_output(); - let expected = r#"Wall time: 0.500 seconds -Process running with session ID 42 -Warning: truncated output (original token count: 1000) -Output: -abc"#; - assert_eq!(expected, text); - } -} diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs deleted file mode 100644 index 9c65745c4fc..00000000000 --- a/codex-rs/core/src/executor/backends.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::collections::HashMap; -use std::env; - -use async_trait::async_trait; - -use crate::CODEX_APPLY_PATCH_ARG1; -use crate::apply_patch::ApplyPatchExec; -use crate::exec::ExecParams; -use crate::executor::ExecutorConfig; -use crate::function_tool::FunctionCallError; - -pub(crate) enum ExecutionMode { - Shell, - ApplyPatch(ApplyPatchExec), -} - -#[async_trait] -/// Backend-specific hooks that prepare and post-process execution requests for a -/// given [`ExecutionMode`]. -pub(crate) trait ExecutionBackend: Send + Sync { - fn prepare( - &self, - params: ExecParams, - // Required for downcasting the apply_patch. - mode: &ExecutionMode, - config: &ExecutorConfig, - ) -> Result; - - fn stream_stdout(&self, _mode: &ExecutionMode) -> bool { - true - } -} - -static SHELL_BACKEND: ShellBackend = ShellBackend; -static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend; - -pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend { - match mode { - ExecutionMode::Shell => &SHELL_BACKEND, - ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND, - } -} - -struct ShellBackend; - -#[async_trait] -impl ExecutionBackend for ShellBackend { - fn prepare( - &self, - params: ExecParams, - mode: &ExecutionMode, - _config: &ExecutorConfig, - ) -> Result { - match mode { - ExecutionMode::Shell => Ok(params), - _ => Err(FunctionCallError::RespondToModel( - "shell backend invoked with non-shell mode".to_string(), - )), - } - } -} - -struct ApplyPatchBackend; - -#[async_trait] -impl ExecutionBackend for ApplyPatchBackend { - fn prepare( - &self, - params: ExecParams, - mode: &ExecutionMode, - config: &ExecutorConfig, - ) -> Result { - match mode { - ExecutionMode::ApplyPatch(exec) => { - let path_to_codex = if let Some(exe_path) = &config.codex_exe { - exe_path.to_string_lossy().to_string() - } else { - env::current_exe() - .ok() - .map(|p| p.to_string_lossy().to_string()) - .ok_or_else(|| { - FunctionCallError::RespondToModel( - "failed to determine path to codex executable".to_string(), - ) - })? - }; - - let patch = exec.action.patch.clone(); - Ok(ExecParams { - command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], - cwd: exec.action.cwd.clone(), - timeout_ms: params.timeout_ms, - // Run apply_patch with a minimal environment for determinism and to - // avoid leaking host environment variables into the patch process. - env: HashMap::new(), - with_escalated_permissions: params.with_escalated_permissions, - justification: params.justification, - }) - } - ExecutionMode::Shell => Err(FunctionCallError::RespondToModel( - "apply_patch backend invoked without patch context".to_string(), - )), - } - } - - fn stream_stdout(&self, _mode: &ExecutionMode) -> bool { - false - } -} diff --git a/codex-rs/core/src/executor/cache.rs b/codex-rs/core/src/executor/cache.rs deleted file mode 100644 index 737ecb927cb..00000000000 --- a/codex-rs/core/src/executor/cache.rs +++ /dev/null @@ -1,51 +0,0 @@ -use std::collections::HashSet; -use std::sync::Arc; -use std::sync::Mutex; - -#[derive(Clone, Debug, Default)] -/// Thread-safe store of user approvals so repeated commands can reuse -/// previously granted trust. -pub(crate) struct ApprovalCache { - inner: Arc>>>, -} - -impl ApprovalCache { - pub(crate) fn insert(&self, command: Vec) { - if command.is_empty() { - return; - } - if let Ok(mut guard) = self.inner.lock() { - guard.insert(command); - } - } - - pub(crate) fn snapshot(&self) -> HashSet> { - self.inner.lock().map(|g| g.clone()).unwrap_or_default() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn insert_ignores_empty_and_dedupes() { - let cache = ApprovalCache::default(); - - // Empty should be ignored - cache.insert(vec![]); - assert!(cache.snapshot().is_empty()); - - // Insert a command and verify snapshot contains it - let cmd = vec!["foo".to_string(), "bar".to_string()]; - cache.insert(cmd.clone()); - let snap1 = cache.snapshot(); - assert!(snap1.contains(&cmd)); - - // Reinserting should not create duplicates - cache.insert(cmd); - let snap2 = cache.snapshot(); - assert_eq!(snap1, snap2); - } -} diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs deleted file mode 100644 index 3f1e02dff29..00000000000 --- a/codex-rs/core/src/executor/mod.rs +++ /dev/null @@ -1,68 +0,0 @@ -mod backends; -mod cache; -mod runner; -mod sandbox; - -pub(crate) use backends::ExecutionMode; -pub(crate) use runner::ExecutionRequest; -pub(crate) use runner::Executor; -pub(crate) use runner::ExecutorConfig; -pub(crate) use runner::normalize_exec_result; - -pub(crate) mod linkers { - use crate::exec::ExecParams; - use crate::exec::StdoutStream; - use crate::executor::backends::ExecutionMode; - use crate::executor::runner::ExecutionRequest; - use crate::tools::context::ExecCommandContext; - - pub struct PreparedExec { - pub(crate) context: ExecCommandContext, - pub(crate) request: ExecutionRequest, - } - - impl PreparedExec { - pub fn new( - context: ExecCommandContext, - params: ExecParams, - approval_command: Vec, - mode: ExecutionMode, - stdout_stream: Option, - use_shell_profile: bool, - ) -> Self { - let request = ExecutionRequest { - params, - approval_command, - mode, - stdout_stream, - use_shell_profile, - }; - - Self { context, request } - } - } -} - -pub mod errors { - use crate::error::CodexErr; - use crate::function_tool::FunctionCallError; - use thiserror::Error; - - #[derive(Debug, Error)] - pub enum ExecError { - #[error(transparent)] - Function(#[from] FunctionCallError), - #[error(transparent)] - Codex(#[from] CodexErr), - } - - impl ExecError { - pub(crate) fn rejection(msg: impl Into) -> Self { - FunctionCallError::RespondToModel(msg.into()).into() - } - - pub(crate) fn denied(msg: impl Into) -> Self { - FunctionCallError::Denied(msg.into()).into() - } - } -} diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs deleted file mode 100644 index 072debb5d3f..00000000000 --- a/codex-rs/core/src/executor/runner.rs +++ /dev/null @@ -1,433 +0,0 @@ -use std::future::Future; -use std::path::PathBuf; -use std::sync::Arc; -use std::sync::RwLock; -use std::time::Duration; - -use super::backends::ExecutionMode; -use super::backends::backend_for_mode; -use super::cache::ApprovalCache; -use crate::codex::Session; -use crate::error::CodexErr; -use crate::error::SandboxErr; -use crate::error::get_error_message_ui; -use crate::exec::ExecParams; -use crate::exec::ExecToolCallOutput; -use crate::exec::SandboxType; -use crate::exec::StdoutStream; -use crate::exec::StreamOutput; -use crate::exec::process_exec_tool_call; -use crate::executor::errors::ExecError; -use crate::executor::sandbox::select_sandbox; -use crate::function_tool::FunctionCallError; -use crate::protocol::AskForApproval; -use crate::protocol::ReviewDecision; -use crate::protocol::SandboxPolicy; -use crate::shell; -use crate::tools::context::ExecCommandContext; -use codex_otel::otel_event_manager::ToolDecisionSource; - -#[derive(Clone, Debug)] -pub(crate) struct ExecutorConfig { - pub(crate) sandbox_policy: SandboxPolicy, - pub(crate) sandbox_cwd: PathBuf, - pub(crate) codex_exe: Option, -} - -impl ExecutorConfig { - pub(crate) fn new( - sandbox_policy: SandboxPolicy, - sandbox_cwd: PathBuf, - codex_exe: Option, - ) -> Self { - Self { - sandbox_policy, - sandbox_cwd, - codex_exe, - } - } -} - -/// Coordinates sandbox selection, backend-specific preparation, and command -/// execution for tool calls requested by the model. -pub(crate) struct Executor { - approval_cache: ApprovalCache, - config: Arc>, -} - -impl Executor { - pub(crate) fn new(config: ExecutorConfig) -> Self { - Self { - approval_cache: ApprovalCache::default(), - config: Arc::new(RwLock::new(config)), - } - } - - /// Updates the sandbox policy and working directory used for future - /// executions without recreating the executor. - pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) { - if let Ok(mut cfg) = self.config.write() { - cfg.sandbox_policy = sandbox_policy; - cfg.sandbox_cwd = sandbox_cwd; - } - } - - /// Runs a prepared execution request end-to-end: prepares parameters, decides on - /// sandbox placement (prompting the user when necessary), launches the command, - /// and lets the backend post-process the final output. - pub(crate) async fn run( - &self, - mut request: ExecutionRequest, - session: &Session, - approval_policy: AskForApproval, - context: &ExecCommandContext, - on_exec_begin: F, - ) -> Result - where - F: FnOnce() -> Fut, - Fut: Future, - { - if matches!(request.mode, ExecutionMode::Shell) { - request.params = - maybe_translate_shell_command(request.params, session, request.use_shell_profile); - } - - // Step 1: Snapshot sandbox configuration so it stays stable for this run. - let config = self - .config - .read() - .map_err(|_| ExecError::rejection("executor config poisoned"))? - .clone(); - - // Step 2: Normalise parameters via the selected backend. - let backend = backend_for_mode(&request.mode); - let stdout_stream = if backend.stream_stdout(&request.mode) { - request.stdout_stream.clone() - } else { - None - }; - request.params = backend - .prepare(request.params, &request.mode, &config) - .map_err(ExecError::from)?; - - // Step 3: Decide sandbox placement, prompting for approval when needed. - let sandbox_decision = select_sandbox( - &request, - approval_policy, - self.approval_cache.snapshot(), - &config, - session, - &context.sub_id, - &context.call_id, - &context.otel_event_manager, - ) - .await?; - if sandbox_decision.record_session_approval { - self.approval_cache.insert(request.approval_command.clone()); - } - on_exec_begin().await; - // Step 4: Launch the command within the chosen sandbox. - let first_attempt = self - .spawn( - request.params.clone(), - sandbox_decision.initial_sandbox, - &config, - stdout_stream.clone(), - ) - .await; - - // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry. - match first_attempt { - Ok(output) => Ok(output), - Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => { - Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()) - } - Err(CodexErr::Sandbox(error)) => { - if sandbox_decision.escalate_on_failure { - self.retry_without_sandbox( - &request, - &config, - session, - context, - stdout_stream, - error, - ) - .await - } else { - let message = sandbox_failure_message(error); - Err(ExecError::rejection(message)) - } - } - Err(err) => Err(err.into()), - } - } - - /// Fallback path invoked when a sandboxed run is denied so the user can - /// approve rerunning without isolation. - async fn retry_without_sandbox( - &self, - request: &ExecutionRequest, - config: &ExecutorConfig, - session: &Session, - context: &ExecCommandContext, - stdout_stream: Option, - sandbox_error: SandboxErr, - ) -> Result { - session - .notify_background_event( - &context.sub_id, - format!("Execution failed: {sandbox_error}"), - ) - .await; - let decision = session - .request_command_approval( - context.sub_id.to_string(), - context.call_id.to_string(), - request.approval_command.clone(), - request.params.cwd.clone(), - Some("command failed; retry without sandbox?".to_string()), - ) - .await; - - context.otel_event_manager.tool_decision( - &context.tool_name, - &context.call_id, - decision, - ToolDecisionSource::User, - ); - match decision { - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { - if matches!(decision, ReviewDecision::ApprovedForSession) { - self.approval_cache.insert(request.approval_command.clone()); - } - session - .notify_background_event(&context.sub_id, "retrying command without sandbox") - .await; - - let retry_output = self - .spawn( - request.params.clone(), - SandboxType::None, - config, - stdout_stream, - ) - .await?; - - Ok(retry_output) - } - ReviewDecision::Denied | ReviewDecision::Abort => { - Err(ExecError::denied("exec command rejected by user")) - } - } - } - - async fn spawn( - &self, - params: ExecParams, - sandbox: SandboxType, - config: &ExecutorConfig, - stdout_stream: Option, - ) -> Result { - process_exec_tool_call( - params, - sandbox, - &config.sandbox_policy, - &config.sandbox_cwd, - &config.codex_exe, - stdout_stream, - ) - .await - } -} - -fn maybe_translate_shell_command( - params: ExecParams, - session: &Session, - use_shell_profile: bool, -) -> ExecParams { - let should_translate = - matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile; - - if should_translate - && let Some(command) = session - .user_shell() - .format_default_shell_invocation(params.command.clone()) - { - return ExecParams { command, ..params }; - } - - params -} - -fn sandbox_failure_message(error: SandboxErr) -> String { - let codex_error = CodexErr::Sandbox(error); - let friendly = get_error_message_ui(&codex_error); - format!("failed in sandbox: {friendly}") -} - -pub(crate) struct ExecutionRequest { - pub params: ExecParams, - pub approval_command: Vec, - pub mode: ExecutionMode, - pub stdout_stream: Option, - pub use_shell_profile: bool, -} - -pub(crate) struct NormalizedExecOutput<'a> { - borrowed: Option<&'a ExecToolCallOutput>, - synthetic: Option, -} - -impl<'a> NormalizedExecOutput<'a> { - pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput { - match (self.borrowed, self.synthetic.as_ref()) { - (Some(output), _) => output, - (None, Some(output)) => output, - (None, None) => unreachable!("normalized exec output missing data"), - } - } -} - -/// Converts a raw execution result into a uniform view that always exposes an -/// [`ExecToolCallOutput`], synthesizing error output when the command fails -/// before producing a response. -pub(crate) fn normalize_exec_result( - result: &Result, -) -> NormalizedExecOutput<'_> { - match result { - Ok(output) => NormalizedExecOutput { - borrowed: Some(output), - synthetic: None, - }, - Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => { - NormalizedExecOutput { - borrowed: Some(output.as_ref()), - synthetic: None, - } - } - Err(err) => { - let message = match err { - ExecError::Function(FunctionCallError::RespondToModel(msg)) - | ExecError::Function(FunctionCallError::Denied(msg)) => msg.clone(), - ExecError::Codex(e) => get_error_message_ui(e), - err => err.to_string(), - }; - let synthetic = ExecToolCallOutput { - exit_code: -1, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(message.clone()), - aggregated_output: StreamOutput::new(message), - duration: Duration::default(), - timed_out: false, - }; - NormalizedExecOutput { - borrowed: None, - synthetic: Some(synthetic), - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::error::CodexErr; - use crate::error::EnvVarError; - use crate::error::SandboxErr; - use crate::exec::StreamOutput; - use pretty_assertions::assert_eq; - - fn make_output(text: &str) -> ExecToolCallOutput { - ExecToolCallOutput { - exit_code: 1, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(String::new()), - aggregated_output: StreamOutput::new(text.to_string()), - duration: Duration::from_millis(123), - timed_out: false, - } - } - - #[test] - fn normalize_success_borrows() { - let out = make_output("ok"); - let result: Result = Ok(out); - let normalized = normalize_exec_result(&result); - assert_eq!(normalized.event_output().aggregated_output.text, "ok"); - } - - #[test] - fn normalize_timeout_borrows_embedded_output() { - let out = make_output("timed out payload"); - let err = CodexErr::Sandbox(SandboxErr::Timeout { - output: Box::new(out), - }); - let result: Result = Err(ExecError::Codex(err)); - let normalized = normalize_exec_result(&result); - assert_eq!( - normalized.event_output().aggregated_output.text, - "timed out payload" - ); - } - - #[test] - fn sandbox_failure_message_uses_denied_stderr() { - let output = ExecToolCallOutput { - exit_code: 101, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new("sandbox stderr".to_string()), - aggregated_output: StreamOutput::new(String::new()), - duration: Duration::from_millis(10), - timed_out: false, - }; - let err = SandboxErr::Denied { - output: Box::new(output), - }; - let message = sandbox_failure_message(err); - assert_eq!(message, "failed in sandbox: sandbox stderr"); - } - - #[test] - fn sandbox_failure_message_falls_back_to_aggregated_output() { - let output = ExecToolCallOutput { - exit_code: 101, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(String::new()), - aggregated_output: StreamOutput::new("aggregate text".to_string()), - duration: Duration::from_millis(10), - timed_out: false, - }; - let err = SandboxErr::Denied { - output: Box::new(output), - }; - let message = sandbox_failure_message(err); - assert_eq!(message, "failed in sandbox: aggregate text"); - } - - #[test] - fn normalize_function_error_synthesizes_payload() { - let err = FunctionCallError::RespondToModel("boom".to_string()); - let result: Result = Err(ExecError::Function(err)); - let normalized = normalize_exec_result(&result); - assert_eq!(normalized.event_output().aggregated_output.text, "boom"); - } - - #[test] - fn normalize_codex_error_synthesizes_user_message() { - // Use a simple EnvVar error which formats to a clear message - let e = CodexErr::EnvVar(EnvVarError { - var: "FOO".to_string(), - instructions: Some("set it".to_string()), - }); - let result: Result = Err(ExecError::Codex(e)); - let normalized = normalize_exec_result(&result); - assert!( - normalized - .event_output() - .aggregated_output - .text - .contains("Missing environment variable: `FOO`"), - "expected synthesized user-friendly message" - ); - } -} diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs deleted file mode 100644 index f0a421a29a0..00000000000 --- a/codex-rs/core/src/executor/sandbox.rs +++ /dev/null @@ -1,405 +0,0 @@ -use crate::apply_patch::ApplyPatchExec; -use crate::codex::Session; -use crate::exec::SandboxType; -use crate::executor::ExecutionMode; -use crate::executor::ExecutionRequest; -use crate::executor::ExecutorConfig; -use crate::executor::errors::ExecError; -use crate::safety::SafetyCheck; -use crate::safety::assess_command_safety; -use crate::safety::assess_patch_safety; -use codex_otel::otel_event_manager::OtelEventManager; -use codex_otel::otel_event_manager::ToolDecisionSource; -use codex_protocol::protocol::AskForApproval; -use codex_protocol::protocol::ReviewDecision; -use std::collections::HashSet; - -/// Sandbox placement options selected for an execution run, including whether -/// to escalate after failures and whether approvals should persist. -pub(crate) struct SandboxDecision { - pub(crate) initial_sandbox: SandboxType, - pub(crate) escalate_on_failure: bool, - pub(crate) record_session_approval: bool, -} - -impl SandboxDecision { - fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self { - Self { - initial_sandbox: sandbox, - escalate_on_failure, - record_session_approval: false, - } - } - - fn user_override(record_session_approval: bool) -> Self { - Self { - initial_sandbox: SandboxType::None, - escalate_on_failure: false, - record_session_approval, - } - } -} - -fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool { - matches!( - (approval, sandbox), - ( - AskForApproval::UnlessTrusted | AskForApproval::OnFailure, - SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp - ) - ) -} - -/// Determines how a command should be sandboxed, prompting the user when -/// policy requires explicit approval. -#[allow(clippy::too_many_arguments)] -pub async fn select_sandbox( - request: &ExecutionRequest, - approval_policy: AskForApproval, - approval_cache: HashSet>, - config: &ExecutorConfig, - session: &Session, - sub_id: &str, - call_id: &str, - otel_event_manager: &OtelEventManager, -) -> Result { - match &request.mode { - ExecutionMode::Shell => { - select_shell_sandbox( - request, - approval_policy, - approval_cache, - config, - session, - sub_id, - call_id, - otel_event_manager, - ) - .await - } - ExecutionMode::ApplyPatch(exec) => { - select_apply_patch_sandbox(exec, approval_policy, config) - } - } -} - -#[allow(clippy::too_many_arguments)] -async fn select_shell_sandbox( - request: &ExecutionRequest, - approval_policy: AskForApproval, - approved_snapshot: HashSet>, - config: &ExecutorConfig, - session: &Session, - sub_id: &str, - call_id: &str, - otel_event_manager: &OtelEventManager, -) -> Result { - let command_for_safety = if request.approval_command.is_empty() { - request.params.command.clone() - } else { - request.approval_command.clone() - }; - - let safety = assess_command_safety( - &command_for_safety, - approval_policy, - &config.sandbox_policy, - &approved_snapshot, - request.params.with_escalated_permissions.unwrap_or(false), - ); - - match safety { - SafetyCheck::AutoApprove { - sandbox_type, - user_explicitly_approved, - } => { - let mut decision = SandboxDecision::auto( - sandbox_type, - should_escalate_on_failure(approval_policy, sandbox_type), - ); - if user_explicitly_approved { - decision.record_session_approval = true; - } - let (decision_for_event, source) = if user_explicitly_approved { - (ReviewDecision::ApprovedForSession, ToolDecisionSource::User) - } else { - (ReviewDecision::Approved, ToolDecisionSource::Config) - }; - otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source); - Ok(decision) - } - SafetyCheck::AskUser => { - let decision = session - .request_command_approval( - sub_id.to_string(), - call_id.to_string(), - request.approval_command.clone(), - request.params.cwd.clone(), - request.params.justification.clone(), - ) - .await; - - otel_event_manager.tool_decision( - "local_shell", - call_id, - decision, - ToolDecisionSource::User, - ); - match decision { - ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)), - ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)), - ReviewDecision::Denied | ReviewDecision::Abort => { - Err(ExecError::denied("exec command rejected by user")) - } - } - } - SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!( - "exec command rejected: {reason}" - ))), - } -} - -fn select_apply_patch_sandbox( - exec: &ApplyPatchExec, - approval_policy: AskForApproval, - config: &ExecutorConfig, -) -> Result { - if exec.user_explicitly_approved_this_action { - return Ok(SandboxDecision::user_override(false)); - } - - match assess_patch_safety( - &exec.action, - approval_policy, - &config.sandbox_policy, - &config.sandbox_cwd, - ) { - SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto( - sandbox_type, - should_escalate_on_failure(approval_policy, sandbox_type), - )), - SafetyCheck::AskUser => Err(ExecError::rejection( - "patch requires approval but none was recorded", - )), - SafetyCheck::Reject { reason } => { - Err(ExecError::rejection(format!("patch rejected: {reason}"))) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::codex::make_session_and_context; - use crate::exec::ExecParams; - use crate::function_tool::FunctionCallError; - use crate::protocol::SandboxPolicy; - use codex_apply_patch::ApplyPatchAction; - use pretty_assertions::assert_eq; - - #[tokio::test] - async fn select_apply_patch_user_override_when_explicit() { - let (session, ctx) = make_session_and_context(); - let tmp = tempfile::tempdir().expect("tmp"); - let p = tmp.path().join("a.txt"); - let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); - let exec = ApplyPatchExec { - action, - user_explicitly_approved_this_action: true, - }; - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); - let request = ExecutionRequest { - params: ExecParams { - command: vec!["apply_patch".into()], - cwd: std::env::temp_dir(), - timeout_ms: None, - env: std::collections::HashMap::new(), - with_escalated_permissions: None, - justification: None, - }, - approval_command: vec!["apply_patch".into()], - mode: ExecutionMode::ApplyPatch(exec), - stdout_stream: None, - use_shell_profile: false, - }; - let otel_event_manager = ctx.client.get_otel_event_manager(); - let decision = select_sandbox( - &request, - AskForApproval::OnRequest, - Default::default(), - &cfg, - &session, - "sub", - "call", - &otel_event_manager, - ) - .await - .expect("ok"); - // Explicit user override runs without sandbox - assert_eq!(decision.initial_sandbox, SandboxType::None); - assert_eq!(decision.escalate_on_failure, false); - } - - #[tokio::test] - async fn select_apply_patch_autoapprove_in_danger() { - let (session, ctx) = make_session_and_context(); - let tmp = tempfile::tempdir().expect("tmp"); - let p = tmp.path().join("a.txt"); - let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); - let exec = ApplyPatchExec { - action, - user_explicitly_approved_this_action: false, - }; - let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); - let request = ExecutionRequest { - params: ExecParams { - command: vec!["apply_patch".into()], - cwd: std::env::temp_dir(), - timeout_ms: None, - env: std::collections::HashMap::new(), - with_escalated_permissions: None, - justification: None, - }, - approval_command: vec!["apply_patch".into()], - mode: ExecutionMode::ApplyPatch(exec), - stdout_stream: None, - use_shell_profile: false, - }; - let otel_event_manager = ctx.client.get_otel_event_manager(); - let decision = select_sandbox( - &request, - AskForApproval::OnRequest, - Default::default(), - &cfg, - &session, - "sub", - "call", - &otel_event_manager, - ) - .await - .expect("ok"); - // On platforms with a sandbox, DangerFullAccess still prefers it - let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None); - assert_eq!(decision.initial_sandbox, expected); - assert_eq!(decision.escalate_on_failure, false); - } - - #[tokio::test] - async fn select_apply_patch_requires_approval_on_unless_trusted() { - let (session, ctx) = make_session_and_context(); - let tempdir = tempfile::tempdir().expect("tmpdir"); - let p = tempdir.path().join("a.txt"); - let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); - let exec = ApplyPatchExec { - action, - user_explicitly_approved_this_action: false, - }; - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); - let request = ExecutionRequest { - params: ExecParams { - command: vec!["apply_patch".into()], - cwd: std::env::temp_dir(), - timeout_ms: None, - env: std::collections::HashMap::new(), - with_escalated_permissions: None, - justification: None, - }, - approval_command: vec!["apply_patch".into()], - mode: ExecutionMode::ApplyPatch(exec), - stdout_stream: None, - use_shell_profile: false, - }; - let otel_event_manager = ctx.client.get_otel_event_manager(); - let result = select_sandbox( - &request, - AskForApproval::UnlessTrusted, - Default::default(), - &cfg, - &session, - "sub", - "call", - &otel_event_manager, - ) - .await; - match result { - Ok(_) => panic!("expected error"), - Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => { - assert!(msg.contains("requires approval")) - } - Err(other) => panic!("unexpected error: {other:?}"), - } - } - - #[tokio::test] - async fn select_shell_autoapprove_in_danger_mode() { - let (session, ctx) = make_session_and_context(); - let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); - let request = ExecutionRequest { - params: ExecParams { - command: vec!["some-unknown".into()], - cwd: std::env::temp_dir(), - timeout_ms: None, - env: std::collections::HashMap::new(), - with_escalated_permissions: None, - justification: None, - }, - approval_command: vec!["some-unknown".into()], - mode: ExecutionMode::Shell, - stdout_stream: None, - use_shell_profile: false, - }; - let otel_event_manager = ctx.client.get_otel_event_manager(); - let decision = select_sandbox( - &request, - AskForApproval::OnRequest, - Default::default(), - &cfg, - &session, - "sub", - "call", - &otel_event_manager, - ) - .await - .expect("ok"); - assert_eq!(decision.initial_sandbox, SandboxType::None); - assert_eq!(decision.escalate_on_failure, false); - } - - #[cfg(any(target_os = "macos", target_os = "linux"))] - #[tokio::test] - async fn select_shell_escalates_on_failure_with_platform_sandbox() { - let (session, ctx) = make_session_and_context(); - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); - let request = ExecutionRequest { - params: ExecParams { - // Unknown command => untrusted but not flagged dangerous - command: vec!["some-unknown".into()], - cwd: std::env::temp_dir(), - timeout_ms: None, - env: std::collections::HashMap::new(), - with_escalated_permissions: None, - justification: None, - }, - approval_command: vec!["some-unknown".into()], - mode: ExecutionMode::Shell, - stdout_stream: None, - use_shell_profile: false, - }; - let otel_event_manager = ctx.client.get_otel_event_manager(); - let decision = select_sandbox( - &request, - AskForApproval::OnFailure, - Default::default(), - &cfg, - &session, - "sub", - "call", - &otel_event_manager, - ) - .await - .expect("ok"); - // On macOS/Linux we should have a platform sandbox and escalate on failure - assert_ne!(decision.initial_sandbox, SandboxType::None); - assert_eq!(decision.escalate_on_failure, true); - } -} diff --git a/codex-rs/core/src/function_tool.rs b/codex-rs/core/src/function_tool.rs index a25fff61139..9d241655695 100644 --- a/codex-rs/core/src/function_tool.rs +++ b/codex-rs/core/src/function_tool.rs @@ -5,6 +5,7 @@ pub enum FunctionCallError { #[error("{0}")] RespondToModel(String), #[error("{0}")] + #[allow(dead_code)] // TODO(jif) fix in a follow-up PR Denied(String), #[error("LocalShellCall without call_id or id")] MissingLocalShellCallId, diff --git a/codex-rs/core/src/landlock.rs b/codex-rs/core/src/landlock.rs index 264ea747cab..340aebff2c1 100644 --- a/codex-rs/core/src/landlock.rs +++ b/codex-rs/core/src/landlock.rs @@ -40,7 +40,7 @@ where } /// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`. -fn create_linux_sandbox_command_args( +pub(crate) fn create_linux_sandbox_command_args( command: Vec, sandbox_policy: &SandboxPolicy, sandbox_policy_cwd: &Path, @@ -56,7 +56,9 @@ fn create_linux_sandbox_command_args( serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON"); let mut linux_cmd: Vec = vec![ + "--sandbox-policy-cwd".to_string(), sandbox_policy_cwd, + "--sandbox-policy".to_string(), sandbox_policy_json, // Separator so that command arguments starting with `-` are not parsed as // options of the helper itself. diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 8ddf8bff9c8..fa307b1ea7d 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -25,9 +25,7 @@ pub mod custom_prompts; mod environment_context; pub mod error; pub mod exec; -mod exec_command; pub mod exec_env; -pub mod executor; pub mod features; mod flags; pub mod git_info; @@ -38,6 +36,7 @@ mod mcp_tool_call; mod message_history; mod model_provider_info; pub mod parse_command; +pub mod sandboxing; pub mod token_data; mod truncate; mod unified_exec; @@ -59,7 +58,6 @@ pub use auth::CodexAuth; pub mod default_client; pub mod model_family; mod openai_model_info; -mod openai_tools; pub mod project_doc; mod rollout; pub(crate) mod safety; diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs deleted file mode 100644 index 0e10f909ecb..00000000000 --- a/codex-rs/core/src/openai_tools.rs +++ /dev/null @@ -1 +0,0 @@ -pub use crate::tools::spec::*; diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index 0ed0f929ff4..4492947d58d 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -1,4 +1,3 @@ -use std::collections::HashSet; use std::path::Component; use std::path::Path; use std::path::PathBuf; @@ -8,8 +7,6 @@ use codex_apply_patch::ApplyPatchFileChange; use crate::exec::SandboxType; -use crate::command_safety::is_dangerous_command::command_might_be_dangerous; -use crate::command_safety::is_safe_command::is_known_safe_command; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; @@ -48,30 +45,29 @@ pub fn assess_patch_safety( } } - // Even though the patch *appears* to be constrained to writable paths, it - // is possible that paths in the patch are hard links to files outside the - // writable roots, so we should still run `apply_patch` in a sandbox in that - // case. + // Even though the patch appears to be constrained to writable paths, it is + // possible that paths in the patch are hard links to files outside the + // writable roots, so we should still run `apply_patch` in a sandbox in that case. if is_write_patch_constrained_to_writable_paths(action, sandbox_policy, cwd) || policy == AskForApproval::OnFailure { - // Only auto‑approve when we can actually enforce a sandbox. Otherwise - // fall back to asking the user because the patch may touch arbitrary - // paths outside the project. - match get_platform_sandbox() { - Some(sandbox_type) => SafetyCheck::AutoApprove { - sandbox_type, + if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) { + // DangerFullAccess is intended to bypass sandboxing entirely. + SafetyCheck::AutoApprove { + sandbox_type: SandboxType::None, user_explicitly_approved: false, - }, - None if sandbox_policy == &SandboxPolicy::DangerFullAccess => { - // If the user has explicitly requested DangerFullAccess, then - // we can auto-approve even without a sandbox. - SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, + } + } else { + // Only auto‑approve when we can actually enforce a sandbox. Otherwise + // fall back to asking the user because the patch may touch arbitrary + // paths outside the project. + match get_platform_sandbox() { + Some(sandbox_type) => SafetyCheck::AutoApprove { + sandbox_type, user_explicitly_approved: false, - } + }, + None => SafetyCheck::AskUser, } - None => SafetyCheck::AskUser, } } else if policy == AskForApproval::Never { SafetyCheck::Reject { @@ -83,124 +79,6 @@ pub fn assess_patch_safety( } } -/// For a command to be run _without_ a sandbox, one of the following must be -/// true: -/// -/// - the user has explicitly approved the command -/// - the command is on the "known safe" list -/// - `DangerFullAccess` was specified and `UnlessTrusted` was not -pub fn assess_command_safety( - command: &[String], - approval_policy: AskForApproval, - sandbox_policy: &SandboxPolicy, - approved: &HashSet>, - with_escalated_permissions: bool, -) -> SafetyCheck { - // Some commands look dangerous. Even if they are run inside a sandbox, - // unless the user has explicitly approved them, we should ask, - // or reject if the approval_policy tells us not to ask. - if command_might_be_dangerous(command) && !approved.contains(command) { - if approval_policy == AskForApproval::Never { - return SafetyCheck::Reject { - reason: "dangerous command detected; rejected by user approval settings" - .to_string(), - }; - } - - return SafetyCheck::AskUser; - } - - // A command is "trusted" because either: - // - it belongs to a set of commands we consider "safe" by default, or - // - the user has explicitly approved the command for this session - // - // Currently, whether a command is "trusted" is a simple boolean, but we - // should include more metadata on this command test to indicate whether it - // should be run inside a sandbox or not. (This could be something the user - // defines as part of `execpolicy`.) - // - // For example, when `is_known_safe_command(command)` returns `true`, it - // would probably be fine to run the command in a sandbox, but when - // `approved.contains(command)` is `true`, the user may have approved it for - // the session _because_ they know it needs to run outside a sandbox. - - if is_known_safe_command(command) || approved.contains(command) { - let user_explicitly_approved = approved.contains(command); - return SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, - user_explicitly_approved, - }; - } - - assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions) -} - -pub(crate) fn assess_safety_for_untrusted_command( - approval_policy: AskForApproval, - sandbox_policy: &SandboxPolicy, - with_escalated_permissions: bool, -) -> SafetyCheck { - use AskForApproval::*; - use SandboxPolicy::*; - - match (approval_policy, sandbox_policy) { - (UnlessTrusted, _) => { - // Even though the user may have opted into DangerFullAccess, - // they also requested that we ask for approval for untrusted - // commands. - SafetyCheck::AskUser - } - (OnFailure, DangerFullAccess) - | (Never, DangerFullAccess) - | (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, - user_explicitly_approved: false, - }, - (OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => { - if with_escalated_permissions { - SafetyCheck::AskUser - } else { - match get_platform_sandbox() { - Some(sandbox_type) => SafetyCheck::AutoApprove { - sandbox_type, - user_explicitly_approved: false, - }, - // Fall back to asking since the command is untrusted and - // we do not have a sandbox available - None => SafetyCheck::AskUser, - } - } - } - (Never, ReadOnly) - | (Never, WorkspaceWrite { .. }) - | (OnFailure, ReadOnly) - | (OnFailure, WorkspaceWrite { .. }) => { - match get_platform_sandbox() { - Some(sandbox_type) => SafetyCheck::AutoApprove { - sandbox_type, - user_explicitly_approved: false, - }, - None => { - if matches!(approval_policy, OnFailure) { - // Since the command is not trusted, even though the - // user has requested to only ask for approval on - // failure, we will ask the user because no sandbox is - // available. - SafetyCheck::AskUser - } else { - // We are in non-interactive mode and lack approval, so - // all we can do is reject the command. - SafetyCheck::Reject { - reason: "auto-rejected because command is not on trusted list" - .to_string(), - } - } - } - } - } - } -} - pub fn get_platform_sandbox() -> Option { if cfg!(target_os = "macos") { Some(SandboxType::MacosSeatbelt) @@ -339,101 +217,4 @@ mod tests { &cwd, )); } - - #[test] - fn test_request_escalated_privileges() { - // Should not be a trusted command - let command = vec!["git commit".to_string()]; - let approval_policy = AskForApproval::OnRequest; - let sandbox_policy = SandboxPolicy::ReadOnly; - let approved: HashSet> = HashSet::new(); - let request_escalated_privileges = true; - - let safety_check = assess_command_safety( - &command, - approval_policy, - &sandbox_policy, - &approved, - request_escalated_privileges, - ); - - assert_eq!(safety_check, SafetyCheck::AskUser); - } - - #[test] - fn dangerous_command_allowed_if_explicitly_approved() { - let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()]; - let approval_policy = AskForApproval::OnRequest; - let sandbox_policy = SandboxPolicy::ReadOnly; - let mut approved: HashSet> = HashSet::new(); - approved.insert(command.clone()); - let request_escalated_privileges = false; - - let safety_check = assess_command_safety( - &command, - approval_policy, - &sandbox_policy, - &approved, - request_escalated_privileges, - ); - - assert_eq!( - safety_check, - SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, - user_explicitly_approved: true, - } - ); - } - - #[test] - fn dangerous_command_not_allowed_if_not_explicitly_approved() { - let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()]; - let approval_policy = AskForApproval::Never; - let sandbox_policy = SandboxPolicy::ReadOnly; - let approved: HashSet> = HashSet::new(); - let request_escalated_privileges = false; - - let safety_check = assess_command_safety( - &command, - approval_policy, - &sandbox_policy, - &approved, - request_escalated_privileges, - ); - - assert_eq!( - safety_check, - SafetyCheck::Reject { - reason: "dangerous command detected; rejected by user approval settings" - .to_string(), - } - ); - } - - #[test] - fn test_request_escalated_privileges_no_sandbox_fallback() { - let command = vec!["git".to_string(), "commit".to_string()]; - let approval_policy = AskForApproval::OnRequest; - let sandbox_policy = SandboxPolicy::ReadOnly; - let approved: HashSet> = HashSet::new(); - let request_escalated_privileges = false; - - let safety_check = assess_command_safety( - &command, - approval_policy, - &sandbox_policy, - &approved, - request_escalated_privileges, - ); - - let expected = match get_platform_sandbox() { - Some(sandbox_type) => SafetyCheck::AutoApprove { - sandbox_type, - user_explicitly_approved: false, - }, - None => SafetyCheck::AskUser, - }; - assert_eq!(safety_check, expected); - } } diff --git a/codex-rs/core/src/sandboxing/mod.rs b/codex-rs/core/src/sandboxing/mod.rs new file mode 100644 index 00000000000..d632b5da19b --- /dev/null +++ b/codex-rs/core/src/sandboxing/mod.rs @@ -0,0 +1,170 @@ +/* +Module: sandboxing + +Build platform wrappers and produce ExecEnv for execution. Owns low‑level +sandbox placement and transformation of portable CommandSpec into a +ready‑to‑spawn environment. +*/ +use crate::exec::ExecToolCallOutput; +use crate::exec::SandboxType; +use crate::exec::StdoutStream; +use crate::exec::execute_exec_env; +use crate::landlock::create_linux_sandbox_command_args; +use crate::protocol::SandboxPolicy; +use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE; +use crate::seatbelt::create_seatbelt_command_args; +use crate::spawn::CODEX_SANDBOX_ENV_VAR; +use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use crate::tools::sandboxing::SandboxablePreference; +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub struct CommandSpec { + pub program: String, + pub args: Vec, + pub cwd: PathBuf, + pub env: HashMap, + pub timeout_ms: Option, + pub with_escalated_permissions: Option, + pub justification: Option, +} + +#[derive(Clone, Debug)] +pub struct ExecEnv { + pub command: Vec, + pub cwd: PathBuf, + pub env: HashMap, + pub timeout_ms: Option, + pub sandbox: SandboxType, + pub with_escalated_permissions: Option, + pub justification: Option, + pub arg0: Option, +} + +pub enum SandboxPreference { + Auto, + Require, + Forbid, +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum SandboxTransformError { + #[error("missing codex-linux-sandbox executable path")] + MissingLinuxSandboxExecutable, +} + +#[derive(Default)] +pub struct SandboxManager; + +impl SandboxManager { + pub fn new() -> Self { + Self + } + + pub(crate) fn select_initial( + &self, + policy: &SandboxPolicy, + pref: SandboxablePreference, + ) -> SandboxType { + match pref { + SandboxablePreference::Forbid => SandboxType::None, + SandboxablePreference::Require => { + #[cfg(target_os = "macos")] + { + return SandboxType::MacosSeatbelt; + } + #[cfg(target_os = "linux")] + { + return SandboxType::LinuxSeccomp; + } + #[allow(unreachable_code)] + SandboxType::None + } + SandboxablePreference::Auto => match policy { + SandboxPolicy::DangerFullAccess => SandboxType::None, + #[cfg(target_os = "macos")] + _ => SandboxType::MacosSeatbelt, + #[cfg(target_os = "linux")] + _ => SandboxType::LinuxSeccomp, + #[cfg(not(any(target_os = "macos", target_os = "linux")))] + _ => SandboxType::None, + }, + } + } + + pub(crate) fn transform( + &self, + spec: &CommandSpec, + policy: &SandboxPolicy, + sandbox: SandboxType, + sandbox_policy_cwd: &Path, + codex_linux_sandbox_exe: Option<&PathBuf>, + ) -> Result { + let mut env = spec.env.clone(); + if !policy.has_full_network_access() { + env.insert( + CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(), + "1".to_string(), + ); + } + + let mut command = Vec::with_capacity(1 + spec.args.len()); + command.push(spec.program.clone()); + command.extend(spec.args.iter().cloned()); + + let (command, sandbox_env, arg0_override) = match sandbox { + SandboxType::None => (command, HashMap::new(), None), + SandboxType::MacosSeatbelt => { + let mut seatbelt_env = HashMap::new(); + seatbelt_env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string()); + let mut args = + create_seatbelt_command_args(command.clone(), policy, sandbox_policy_cwd); + let mut full_command = Vec::with_capacity(1 + args.len()); + full_command.push(MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string()); + full_command.append(&mut args); + (full_command, seatbelt_env, None) + } + SandboxType::LinuxSeccomp => { + let exe = codex_linux_sandbox_exe + .ok_or(SandboxTransformError::MissingLinuxSandboxExecutable)?; + let mut args = + create_linux_sandbox_command_args(command.clone(), policy, sandbox_policy_cwd); + let mut full_command = Vec::with_capacity(1 + args.len()); + full_command.push(exe.to_string_lossy().to_string()); + full_command.append(&mut args); + ( + full_command, + HashMap::new(), + Some("codex-linux-sandbox".to_string()), + ) + } + }; + + env.extend(sandbox_env); + + Ok(ExecEnv { + command, + cwd: spec.cwd.clone(), + env, + timeout_ms: spec.timeout_ms, + sandbox, + with_escalated_permissions: spec.with_escalated_permissions, + justification: spec.justification.clone(), + arg0: arg0_override, + }) + } + + pub fn denied(&self, sandbox: SandboxType, out: &ExecToolCallOutput) -> bool { + crate::exec::is_likely_sandbox_denied(sandbox, out) + } +} + +pub async fn execute_env( + env: &ExecEnv, + policy: &SandboxPolicy, + stdout_stream: Option, +) -> crate::error::Result { + execute_exec_env(env.clone(), policy, stdout_stream).await +} diff --git a/codex-rs/core/src/seatbelt.rs b/codex-rs/core/src/seatbelt.rs index 09e93668bcf..abd88d41bf2 100644 --- a/codex-rs/core/src/seatbelt.rs +++ b/codex-rs/core/src/seatbelt.rs @@ -14,7 +14,7 @@ const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl /// to defend against an attacker trying to inject a malicious version on the /// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker /// already has root access. -const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; +pub(crate) const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; pub async fn spawn_command_under_seatbelt( command: Vec, @@ -39,7 +39,7 @@ pub async fn spawn_command_under_seatbelt( .await } -fn create_seatbelt_command_args( +pub(crate) fn create_seatbelt_command_args( command: Vec, sandbox_policy: &SandboxPolicy, sandbox_policy_cwd: &Path, diff --git a/codex-rs/core/src/shell.rs b/codex-rs/core/src/shell.rs index ac93d8b12e5..9a3f88976e5 100644 --- a/codex-rs/core/src/shell.rs +++ b/codex-rs/core/src/shell.rs @@ -347,6 +347,7 @@ mod tests { )]), with_escalated_permissions: None, justification: None, + arg0: None, }, SandboxType::None, &SandboxPolicy::DangerFullAccess, @@ -455,6 +456,7 @@ mod macos_tests { )]), with_escalated_permissions: None, justification: None, + arg0: None, }, SandboxType::None, &SandboxPolicy::DangerFullAccess, diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index b18f1b99405..ad6f5f90e81 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -2,9 +2,8 @@ use std::sync::Arc; use crate::AuthManager; use crate::RolloutRecorder; -use crate::exec_command::ExecSessionManager; -use crate::executor::Executor; use crate::mcp_connection_manager::McpConnectionManager; +use crate::tools::sandboxing::ApprovalStore; use crate::unified_exec::UnifiedExecSessionManager; use crate::user_notification::UserNotifier; use codex_otel::otel_event_manager::OtelEventManager; @@ -12,13 +11,12 @@ use tokio::sync::Mutex; pub(crate) struct SessionServices { pub(crate) mcp_connection_manager: McpConnectionManager, - pub(crate) session_manager: ExecSessionManager, pub(crate) unified_exec_manager: UnifiedExecSessionManager, pub(crate) notifier: UserNotifier, pub(crate) rollout: Mutex>, pub(crate) user_shell: crate::shell::Shell, pub(crate) show_raw_agent_reasoning: bool, - pub(crate) executor: Executor, pub(crate) auth_manager: Arc, pub(crate) otel_event_manager: OtelEventManager, + pub(crate) tool_approvals: Mutex, } diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index 7ab4691a554..a262404b05d 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -232,6 +232,7 @@ mod tests { } #[derive(Clone, Debug)] +#[allow(dead_code)] pub(crate) struct ExecCommandContext { pub(crate) sub_id: String, pub(crate) call_id: String, @@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext { } #[derive(Clone, Debug)] +#[allow(dead_code)] pub(crate) struct ApplyPatchCommandContext { pub(crate) user_explicitly_approved_this_action: bool, pub(crate) changes: HashMap, diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs new file mode 100644 index 00000000000..e09f266bbe5 --- /dev/null +++ b/codex-rs/core/src/tools/events.rs @@ -0,0 +1,235 @@ +use crate::codex::Session; +use crate::exec::ExecToolCallOutput; +use crate::parse_command::parse_command; +use crate::protocol::Event; +use crate::protocol::EventMsg; +use crate::protocol::ExecCommandBeginEvent; +use crate::protocol::ExecCommandEndEvent; +use crate::protocol::FileChange; +use crate::protocol::PatchApplyBeginEvent; +use crate::protocol::PatchApplyEndEvent; +use crate::protocol::TurnDiffEvent; +use crate::tools::context::SharedTurnDiffTracker; +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Duration; + +use super::format_exec_output; +use super::format_exec_output_str; + +#[derive(Clone, Copy)] +pub(crate) struct ToolEventCtx<'a> { + pub session: &'a Session, + pub sub_id: &'a str, + pub call_id: &'a str, + pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>, +} + +impl<'a> ToolEventCtx<'a> { + pub fn new( + session: &'a Session, + sub_id: &'a str, + call_id: &'a str, + turn_diff_tracker: Option<&'a SharedTurnDiffTracker>, + ) -> Self { + Self { + session, + sub_id, + call_id, + turn_diff_tracker, + } + } +} + +pub(crate) enum ToolEventStage { + Begin, + Success(ExecToolCallOutput), + Failure(ToolEventFailure), +} + +pub(crate) enum ToolEventFailure { + Output(ExecToolCallOutput), + Message(String), +} +// Concrete, allocation-free emitter: avoid trait objects and boxed futures. +pub(crate) enum ToolEmitter { + Shell { + command: Vec, + cwd: PathBuf, + }, + ApplyPatch { + changes: HashMap, + auto_approved: bool, + }, +} + +impl ToolEmitter { + pub fn shell(command: Vec, cwd: PathBuf) -> Self { + Self::Shell { command, cwd } + } + + pub fn apply_patch(changes: HashMap, auto_approved: bool) -> Self { + Self::ApplyPatch { + changes, + auto_approved, + } + } + + pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) { + match (self, stage) { + (Self::Shell { command, cwd }, ToolEventStage::Begin) => { + ctx.session + .send_event(Event { + id: ctx.sub_id.to_string(), + msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent { + call_id: ctx.call_id.to_string(), + command: command.clone(), + cwd: cwd.clone(), + parsed_cmd: parse_command(command), + }), + }) + .await; + } + (Self::Shell { .. }, ToolEventStage::Success(output)) => { + emit_exec_end( + ctx, + output.stdout.text.clone(), + output.stderr.text.clone(), + output.aggregated_output.text.clone(), + output.exit_code, + output.duration, + format_exec_output_str(&output), + ) + .await; + } + (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => { + emit_exec_end( + ctx, + output.stdout.text.clone(), + output.stderr.text.clone(), + output.aggregated_output.text.clone(), + output.exit_code, + output.duration, + format_exec_output_str(&output), + ) + .await; + } + (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => { + emit_exec_end( + ctx, + String::new(), + (*message).to_string(), + (*message).to_string(), + -1, + Duration::ZERO, + format_exec_output(&message), + ) + .await; + } + + ( + Self::ApplyPatch { + changes, + auto_approved, + }, + ToolEventStage::Begin, + ) => { + if let Some(tracker) = ctx.turn_diff_tracker { + let mut guard = tracker.lock().await; + guard.on_patch_begin(changes); + } + ctx.session + .send_event(Event { + id: ctx.sub_id.to_string(), + msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent { + call_id: ctx.call_id.to_string(), + auto_approved: *auto_approved, + changes: changes.clone(), + }), + }) + .await; + } + (Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => { + emit_patch_end( + ctx, + output.stdout.text.clone(), + output.stderr.text.clone(), + output.exit_code == 0, + ) + .await; + } + ( + Self::ApplyPatch { .. }, + ToolEventStage::Failure(ToolEventFailure::Output(output)), + ) => { + emit_patch_end( + ctx, + output.stdout.text.clone(), + output.stderr.text.clone(), + output.exit_code == 0, + ) + .await; + } + ( + Self::ApplyPatch { .. }, + ToolEventStage::Failure(ToolEventFailure::Message(message)), + ) => { + emit_patch_end(ctx, String::new(), (*message).to_string(), false).await; + } + } + } +} + +async fn emit_exec_end( + ctx: ToolEventCtx<'_>, + stdout: String, + stderr: String, + aggregated_output: String, + exit_code: i32, + duration: Duration, + formatted_output: String, +) { + ctx.session + .send_event(Event { + id: ctx.sub_id.to_string(), + msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent { + call_id: ctx.call_id.to_string(), + stdout, + stderr, + aggregated_output, + exit_code, + duration, + formatted_output, + }), + }) + .await; +} + +async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) { + ctx.session + .send_event(Event { + id: ctx.sub_id.to_string(), + msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent { + call_id: ctx.call_id.to_string(), + stdout, + stderr, + success, + }), + }) + .await; + + if let Some(tracker) = ctx.turn_diff_tracker { + let unified_diff = { + let mut guard = tracker.lock().await; + guard.get_unified_diff() + }; + if let Ok(Some(unified_diff)) = unified_diff { + ctx.session + .send_event(Event { + id: ctx.sub_id.to_string(), + msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }), + }) + .await; + } + } +} diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index d85ac8b76f3..4223740c3a5 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool; use crate::client_common::tools::ToolSpec; use crate::exec::ExecParams; use crate::function_tool::FunctionCallError; -use crate::openai_tools::JsonSchema; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; @@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use crate::tools::spec::ApplyPatchToolArgs; +use crate::tools::spec::JsonSchema; use async_trait::async_trait; use serde::Deserialize; use serde::Serialize; @@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler { env: HashMap::new(), with_escalated_permissions: None, justification: None, + arg0: None, }; let content = handle_container_exec_with_params( diff --git a/codex-rs/core/src/tools/handlers/exec_stream.rs b/codex-rs/core/src/tools/handlers/exec_stream.rs deleted file mode 100644 index 7f14c6737a4..00000000000 --- a/codex-rs/core/src/tools/handlers/exec_stream.rs +++ /dev/null @@ -1,68 +0,0 @@ -use async_trait::async_trait; - -use crate::exec_command::EXEC_COMMAND_TOOL_NAME; -use crate::exec_command::ExecCommandParams; -use crate::exec_command::WRITE_STDIN_TOOL_NAME; -use crate::exec_command::WriteStdinParams; -use crate::function_tool::FunctionCallError; -use crate::tools::context::ToolInvocation; -use crate::tools::context::ToolOutput; -use crate::tools::context::ToolPayload; -use crate::tools::registry::ToolHandler; -use crate::tools::registry::ToolKind; - -pub struct ExecStreamHandler; - -#[async_trait] -impl ToolHandler for ExecStreamHandler { - fn kind(&self) -> ToolKind { - ToolKind::Function - } - - async fn handle(&self, invocation: ToolInvocation) -> Result { - let ToolInvocation { - session, - tool_name, - payload, - .. - } = invocation; - - let arguments = match payload { - ToolPayload::Function { arguments } => arguments, - _ => { - return Err(FunctionCallError::RespondToModel( - "exec_stream handler received unsupported payload".to_string(), - )); - } - }; - - let content = match tool_name.as_str() { - EXEC_COMMAND_TOOL_NAME => { - let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| { - FunctionCallError::RespondToModel(format!( - "failed to parse function arguments: {e:?}" - )) - })?; - session.handle_exec_command_tool(params).await? - } - WRITE_STDIN_TOOL_NAME => { - let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| { - FunctionCallError::RespondToModel(format!( - "failed to parse function arguments: {e:?}" - )) - })?; - session.handle_write_stdin_tool(params).await? - } - _ => { - return Err(FunctionCallError::RespondToModel(format!( - "exec_stream handler does not support tool {tool_name}" - ))); - } - }; - - Ok(ToolOutput::Function { - content, - success: Some(true), - }) - } -} diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index 355e5d607a7..187b4444140 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -1,5 +1,4 @@ pub mod apply_patch; -mod exec_stream; mod grep_files; mod list_dir; mod mcp; @@ -14,7 +13,6 @@ mod view_image; pub use plan::PLAN_TOOL; pub use apply_patch::ApplyPatchHandler; -pub use exec_stream::ExecStreamHandler; pub use grep_files::GrepFilesHandler; pub use list_dir::ListDirHandler; pub use mcp::McpHandler; diff --git a/codex-rs/core/src/tools/handlers/plan.rs b/codex-rs/core/src/tools/handlers/plan.rs index 386933a52dd..9291443f08e 100644 --- a/codex-rs/core/src/tools/handlers/plan.rs +++ b/codex-rs/core/src/tools/handlers/plan.rs @@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool; use crate::client_common::tools::ToolSpec; use crate::codex::Session; use crate::function_tool::FunctionCallError; -use crate::openai_tools::JsonSchema; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; +use crate::tools::spec::JsonSchema; use async_trait::async_trait; use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::protocol::Event; diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index 1b27a58ef3c..168b8cd03e1 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -24,6 +24,7 @@ impl ShellHandler { env: create_env(&turn_context.shell_environment_policy), with_escalated_permissions: params.with_escalated_permissions, justification: params.justification, + arg0: None, } } } diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index ce47dded3c3..d171a1f7e33 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler { async fn handle(&self, invocation: ToolInvocation) -> Result { let ToolInvocation { - session, payload, .. + session, + turn, + sub_id, + call_id, + tool_name: _tool_name, + payload, + .. } = invocation; let args = match payload { @@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler { }; let request = UnifiedExecRequest { - session_id: parsed_session_id, input_chunks: &input, timeout_ms, }; let value = session - .run_unified_exec_request(request) + .services + .unified_exec_manager + .handle_request( + request, + crate::unified_exec::UnifiedExecContext { + session: &session, + turn: turn.as_ref(), + sub_id: &sub_id, + call_id: &call_id, + session_id: parsed_session_id, + }, + ) .await .map_err(|err| { FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}")) diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 607697e0831..c0e9a90fb7e 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -1,12 +1,15 @@ pub mod context; +pub mod events; pub(crate) mod handlers; +pub mod orchestrator; pub mod parallel; pub mod registry; pub mod router; +pub mod runtimes; +pub mod sandboxing; pub mod spec; use crate::apply_patch; -use crate::apply_patch::ApplyPatchExec; use crate::apply_patch::InternalApplyPatchInvocation; use crate::apply_patch::convert_apply_patch_to_protocol; use crate::codex::Session; @@ -15,14 +18,19 @@ use crate::error::CodexErr; use crate::error::SandboxErr; use crate::exec::ExecParams; use crate::exec::ExecToolCallOutput; -use crate::exec::StdoutStream; -use crate::executor::ExecutionMode; -use crate::executor::errors::ExecError; -use crate::executor::linkers::PreparedExec; use crate::function_tool::FunctionCallError; -use crate::tools::context::ApplyPatchCommandContext; -use crate::tools::context::ExecCommandContext; use crate::tools::context::SharedTurnDiffTracker; +use crate::tools::events::ToolEmitter; +use crate::tools::events::ToolEventCtx; +use crate::tools::events::ToolEventFailure; +use crate::tools::events::ToolEventStage; +use crate::tools::orchestrator::ToolOrchestrator; +use crate::tools::runtimes::apply_patch::ApplyPatchRequest; +use crate::tools::runtimes::apply_patch::ApplyPatchRuntime; +use crate::tools::runtimes::shell::ShellRequest; +use crate::tools::runtimes::shell::ShellRuntime; +use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; use codex_apply_patch::MaybeApplyPatchVerified; use codex_apply_patch::maybe_parse_apply_patch_verified; use codex_protocol::protocol::AskForApproval; @@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params( sub_id: String, call_id: String, ) -> Result { - let otel_event_manager = turn_context.client.get_otel_event_manager(); + let _otel_event_manager = turn_context.client.get_otel_event_manager(); if params.with_escalated_permissions.unwrap_or(false) && !matches!(turn_context.approval_policy, AskForApproval::OnRequest) @@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params( MaybeApplyPatchVerified::NotApplyPatch => None, }; - let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() { - vec!["apply_patch".to_string(), exec.action.patch.clone()] - } else { - params.command.clone() - }; - - let exec_command_context = ExecCommandContext { - sub_id: sub_id.clone(), - call_id: call_id.clone(), - command_for_display: command_for_display.clone(), - cwd: params.cwd.clone(), - apply_patch: apply_patch_exec.as_ref().map( - |ApplyPatchExec { - action, - user_explicitly_approved_this_action, - }| ApplyPatchCommandContext { - user_explicitly_approved_this_action: *user_explicitly_approved_this_action, - changes: convert_apply_patch_to_protocol(action), - }, + let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() { + Some(exec) => ( + ToolEmitter::apply_patch( + convert_apply_patch_to_protocol(&exec.action), + !exec.user_explicitly_approved_this_action, + ), + Some(&turn_diff_tracker), + ), + None => ( + ToolEmitter::shell(params.command.clone(), params.cwd.clone()), + None, ), - tool_name: tool_name.to_string(), - otel_event_manager, }; - let mode = match apply_patch_exec { - Some(exec) => ExecutionMode::ApplyPatch(exec), - None => ExecutionMode::Shell, - }; + let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt); + event_emitter.emit(event_ctx, ToolEventStage::Begin).await; + + // Build runtime contexts only when needed (shell/apply_patch below). + + if let Some(exec) = apply_patch_exec { + // Route apply_patch execution through the new orchestrator/runtime. + let req = ApplyPatchRequest { + patch: exec.action.patch.clone(), + cwd: params.cwd.clone(), + timeout_ms: params.timeout_ms, + user_explicitly_approved: exec.user_explicitly_approved_this_action, + codex_exe: turn_context.codex_linux_sandbox_exe.clone(), + }; + + let mut orchestrator = ToolOrchestrator::new(); + let mut runtime = ApplyPatchRuntime::new(); + let tool_ctx = ToolCtx { + session: sess.as_ref(), + sub_id: sub_id.clone(), + call_id: call_id.clone(), + tool_name: tool_name.to_string(), + }; + + let out = orchestrator + .run( + &mut runtime, + &req, + &tool_ctx, + &turn_context, + turn_context.approval_policy, + ) + .await; - sess.services.executor.update_environment( - turn_context.sandbox_policy.clone(), - turn_context.cwd.clone(), - ); - - let prepared_exec = PreparedExec::new( - exec_command_context, - params, - command_for_display, - mode, - Some(StdoutStream { + handle_exec_outcome(&event_emitter, event_ctx, out).await + } else { + // Route shell execution through the new orchestrator/runtime. + let req = ShellRequest { + command: params.command.clone(), + cwd: params.cwd.clone(), + timeout_ms: params.timeout_ms, + env: params.env.clone(), + with_escalated_permissions: params.with_escalated_permissions, + justification: params.justification.clone(), + }; + + let mut orchestrator = ToolOrchestrator::new(); + let mut runtime = ShellRuntime::new(); + let tool_ctx = ToolCtx { + session: sess.as_ref(), sub_id: sub_id.clone(), call_id: call_id.clone(), - tx_event: sess.get_tx_event(), - }), - turn_context.shell_environment_policy.use_profile, - ); - - let output_result = sess - .run_exec_with_events( - turn_diff_tracker.clone(), - prepared_exec, - turn_context.approval_policy, - ) - .await; + tool_name: tool_name.to_string(), + }; + + let out = orchestrator + .run( + &mut runtime, + &req, + &tool_ctx, + &turn_context, + turn_context.approval_policy, + ) + .await; + + handle_exec_outcome(&event_emitter, event_ctx, out).await + } +} - // always make sure to truncate the output if its length isn't controlled. - match output_result { +async fn handle_exec_outcome( + event_emitter: &ToolEmitter, + event_ctx: ToolEventCtx<'_>, + out: Result, +) -> Result { + let event; + let result = match out { Ok(output) => { - let ExecToolCallOutput { exit_code, .. } = &output; - let content = format_exec_output_apply_patch(&output); - if *exit_code == 0 { + let content = format_exec_output_for_model(&output); + let exit_code = output.exit_code; + event = ToolEventStage::Success(output); + if exit_code == 0 { Ok(content) } else { Err(FunctionCallError::RespondToModel(content)) } } - Err(ExecError::Function(err)) => Err(truncate_function_error(err)), - Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err( - FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)), - ), - Err(ExecError::Codex(err)) => { + Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) + | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => { + let response = format_exec_output_for_model(&output); + event = ToolEventStage::Failure(ToolEventFailure::Output(*output)); + Err(FunctionCallError::RespondToModel(response)) + } + Err(ToolError::Codex(err)) => { let message = format!("execution error: {err:?}"); + let response = format_exec_output(&message); + event = ToolEventStage::Failure(ToolEventFailure::Message(message)); Err(FunctionCallError::RespondToModel(format_exec_output( - &message, + &response, ))) } - } + Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => { + // Normalize common rejection messages for exec tools so tests and + // users see a clear, consistent phrase. + let normalized = if msg == "rejected by user" { + "exec command rejected by user".to_string() + } else { + msg + }; + let response = format_exec_output(&normalized); + event = ToolEventStage::Failure(ToolEventFailure::Message(normalized)); + Err(FunctionCallError::RespondToModel(format_exec_output( + &response, + ))) + } + }; + event_emitter.emit(event_ctx, event).await; + result } -pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String { +/// Format the combined exec output for sending back to the model. +/// Includes exit code and duration metadata; truncates large bodies safely. +pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String { let ExecToolCallOutput { exit_code, duration, @@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { format_exec_output(content) } -fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { - match err { - FunctionCallError::RespondToModel(msg) => { - FunctionCallError::RespondToModel(format_exec_output(&msg)) - } - FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)), - FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)), - other => other, - } -} - -fn format_exec_output(content: &str) -> String { +pub(super) fn format_exec_output(content: &str) -> String { // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); @@ -315,6 +368,17 @@ mod tests { use super::*; use regex_lite::Regex; + fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { + match err { + FunctionCallError::RespondToModel(msg) => { + FunctionCallError::RespondToModel(format_exec_output(&msg)) + } + FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)), + FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)), + other => other, + } + } + fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { let pattern = truncated_message_pattern(line, total_lines); let regex = Regex::new(&pattern).unwrap_or_else(|err| { diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs new file mode 100644 index 00000000000..4d06163323e --- /dev/null +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -0,0 +1,172 @@ +/* +Module: orchestrator + +Central place for approvals + sandbox selection + retry semantics. Drives a +simple sequence for any ToolRuntime: approval → select sandbox → attempt → +retry without sandbox on denial (no re‑approval thanks to caching). +*/ +use crate::error::CodexErr; +use crate::error::SandboxErr; +use crate::exec::ExecToolCallOutput; +use crate::sandboxing::SandboxManager; +use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; +use crate::tools::sandboxing::ToolRuntime; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::ReviewDecision; + +pub(crate) struct ToolOrchestrator { + sandbox: SandboxManager, +} + +impl ToolOrchestrator { + pub fn new() -> Self { + Self { + sandbox: SandboxManager::new(), + } + } + + pub async fn run( + &mut self, + tool: &mut T, + req: &Rq, + tool_ctx: &ToolCtx<'_>, + turn_ctx: &crate::codex::TurnContext, + approval_policy: AskForApproval, + ) -> Result + where + T: ToolRuntime, + { + let otel = turn_ctx.client.get_otel_event_manager(); + let otel_tn = &tool_ctx.tool_name; + let otel_ci = &tool_ctx.call_id; + let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User; + let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config; + + // 1) Approval + let needs_initial_approval = + tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy); + let mut already_approved = false; + + if needs_initial_approval { + let approval_ctx = ApprovalCtx { + session: tool_ctx.session, + sub_id: &tool_ctx.sub_id, + call_id: &tool_ctx.call_id, + retry_reason: None, + }; + let decision = tool.start_approval_async(req, approval_ctx).await; + + otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone()); + + match decision { + ReviewDecision::Denied | ReviewDecision::Abort => { + return Err(ToolError::Rejected("rejected by user".to_string())); + } + ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {} + } + already_approved = true; + } else { + otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg); + } + + // 2) First attempt under the selected sandbox. + let mut initial_sandbox = self + .sandbox + .select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference()); + if tool.wants_escalated_first_attempt(req) { + initial_sandbox = crate::exec::SandboxType::None; + } + let initial_attempt = SandboxAttempt { + sandbox: initial_sandbox, + policy: &turn_ctx.sandbox_policy, + manager: &self.sandbox, + sandbox_cwd: &turn_ctx.cwd, + codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(), + }; + + match tool.run(req, &initial_attempt, tool_ctx).await { + Ok(out) => { + // We have a successful initial result + Ok(out) + } + Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => { + if !tool.escalate_on_failure() { + return Err(ToolError::SandboxDenied( + "sandbox denied and no retry".to_string(), + )); + } + // Under `Never`, do not retry without sandbox; surface a concise message + // derived from the actual output (platform-agnostic). + if matches!(approval_policy, AskForApproval::Never) { + let msg = build_never_denied_message_from_output(output.as_ref()); + return Err(ToolError::SandboxDenied(msg)); + } + + // Ask for approval before retrying without sandbox. + if !tool.should_bypass_approval(approval_policy, already_approved) { + let reason_msg = build_denial_reason_from_output(output.as_ref()); + let approval_ctx = ApprovalCtx { + session: tool_ctx.session, + sub_id: &tool_ctx.sub_id, + call_id: &tool_ctx.call_id, + retry_reason: Some(reason_msg), + }; + + let decision = tool.start_approval_async(req, approval_ctx).await; + otel.tool_decision(otel_tn, otel_ci, decision, otel_user); + + match decision { + ReviewDecision::Denied | ReviewDecision::Abort => { + return Err(ToolError::Rejected("rejected by user".to_string())); + } + ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {} + } + } + + let escalated_attempt = SandboxAttempt { + sandbox: crate::exec::SandboxType::None, + policy: &turn_ctx.sandbox_policy, + manager: &self.sandbox, + sandbox_cwd: &turn_ctx.cwd, + codex_linux_sandbox_exe: None, + }; + + // Second attempt. + (*tool).run(req, &escalated_attempt, tool_ctx).await + } + other => other, + } + } +} + +fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String { + let body = format!( + "{}\n{}\n{}", + output.stderr.text, output.stdout.text, output.aggregated_output.text + ) + .to_lowercase(); + + let detail = if body.contains("permission denied") { + Some("Permission denied") + } else if body.contains("operation not permitted") { + Some("Operation not permitted") + } else if body.contains("read-only file system") { + Some("Read-only file system") + } else { + None + }; + + match detail { + Some(tag) => format!("failed in sandbox: {tag}"), + None => "failed in sandbox".to_string(), + } +} + +fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String { + // Keep approval reason terse and stable for UX/tests, but accept the + // output so we can evolve heuristics later without touching call sites. + "command failed; retry without sandbox?".to_string() +} diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs new file mode 100644 index 00000000000..f6062a3a9cc --- /dev/null +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -0,0 +1,148 @@ +//! Apply Patch runtime: executes verified patches under the orchestrator. +//! +//! Assumes `apply_patch` verification/approval happened upstream. Reuses that +//! decision to avoid re-prompting, builds the self-invocation command for +//! `codex --codex-run-as-apply-patch`, and runs under the current +//! `SandboxAttempt` with a minimal environment. +use crate::CODEX_APPLY_PATCH_ARG1; +use crate::exec::ExecToolCallOutput; +use crate::sandboxing::CommandSpec; +use crate::sandboxing::execute_env; +use crate::tools::sandboxing::Approvable; +use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::Sandboxable; +use crate::tools::sandboxing::SandboxablePreference; +use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; +use crate::tools::sandboxing::ToolRuntime; +use crate::tools::sandboxing::with_cached_approval; +use codex_protocol::protocol::ReviewDecision; +use futures::future::BoxFuture; +use std::collections::HashMap; +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub struct ApplyPatchRequest { + pub patch: String, + pub cwd: PathBuf, + pub timeout_ms: Option, + pub user_explicitly_approved: bool, + pub codex_exe: Option, +} + +#[derive(Default)] +pub struct ApplyPatchRuntime; + +#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] +pub(crate) struct ApprovalKey { + patch: String, + cwd: PathBuf, +} + +impl ApplyPatchRuntime { + pub fn new() -> Self { + Self + } + + fn build_command_spec(req: &ApplyPatchRequest) -> Result { + use std::env; + let exe = if let Some(path) = &req.codex_exe { + path.clone() + } else { + env::current_exe() + .map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))? + }; + let program = exe.to_string_lossy().to_string(); + Ok(CommandSpec { + program, + args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()], + cwd: req.cwd.clone(), + timeout_ms: req.timeout_ms, + // Run apply_patch with a minimal environment for determinism and to avoid leaks. + env: HashMap::new(), + with_escalated_permissions: None, + justification: None, + }) + } + + fn stdout_stream(ctx: &ToolCtx<'_>) -> Option { + Some(crate::exec::StdoutStream { + sub_id: ctx.sub_id.clone(), + call_id: ctx.call_id.clone(), + tx_event: ctx.session.get_tx_event(), + }) + } +} + +impl Sandboxable for ApplyPatchRuntime { + fn sandbox_preference(&self) -> SandboxablePreference { + SandboxablePreference::Auto + } + fn escalate_on_failure(&self) -> bool { + true + } +} + +impl Approvable for ApplyPatchRuntime { + type ApprovalKey = ApprovalKey; + + fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey { + ApprovalKey { + patch: req.patch.clone(), + cwd: req.cwd.clone(), + } + } + + fn start_approval_async<'a>( + &'a mut self, + req: &'a ApplyPatchRequest, + ctx: ApprovalCtx<'a>, + ) -> BoxFuture<'a, ReviewDecision> { + let key = self.approval_key(req); + let session = ctx.session; + let sub_id = ctx.sub_id.to_string(); + let call_id = ctx.call_id.to_string(); + let cwd = req.cwd.clone(); + let retry_reason = ctx.retry_reason.clone(); + let user_explicitly_approved = req.user_explicitly_approved; + Box::pin(async move { + with_cached_approval(&session.services, key, || async move { + if let Some(reason) = retry_reason { + session + .request_command_approval( + sub_id, + call_id, + vec!["apply_patch".to_string()], + cwd, + Some(reason), + ) + .await + } else if user_explicitly_approved { + ReviewDecision::ApprovedForSession + } else { + ReviewDecision::Approved + } + }) + .await + }) + } +} + +impl ToolRuntime for ApplyPatchRuntime { + async fn run( + &mut self, + req: &ApplyPatchRequest, + attempt: &SandboxAttempt<'_>, + ctx: &ToolCtx<'_>, + ) -> Result { + let spec = Self::build_command_spec(req)?; + let env = attempt + .env_for(&spec) + .map_err(|err| ToolError::Codex(err.into()))?; + let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx)) + .await + .map_err(ToolError::Codex)?; + Ok(out) + } +} diff --git a/codex-rs/core/src/tools/runtimes/mod.rs b/codex-rs/core/src/tools/runtimes/mod.rs new file mode 100644 index 00000000000..212163d72cb --- /dev/null +++ b/codex-rs/core/src/tools/runtimes/mod.rs @@ -0,0 +1,38 @@ +/* +Module: runtimes + +Concrete ToolRuntime implementations for specific tools. Each runtime stays +small and focused and reuses the orchestrator for approvals + sandbox + retry. +*/ +use crate::sandboxing::CommandSpec; +use crate::tools::sandboxing::ToolError; +use std::collections::HashMap; +use std::path::Path; + +pub mod apply_patch; +pub mod shell; +pub mod unified_exec; + +/// Shared helper to construct a CommandSpec from a tokenized command line. +/// Validates that at least a program is present. +pub(crate) fn build_command_spec( + command: &[String], + cwd: &Path, + env: &HashMap, + timeout_ms: Option, + with_escalated_permissions: Option, + justification: Option, +) -> Result { + let (program, args) = command + .split_first() + .ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?; + Ok(CommandSpec { + program: program.clone(), + args: args.to_vec(), + cwd: cwd.to_path_buf(), + env: env.clone(), + timeout_ms, + with_escalated_permissions, + justification, + }) +} diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs new file mode 100644 index 00000000000..313e9e07006 --- /dev/null +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -0,0 +1,164 @@ +/* +Runtime: shell + +Executes shell requests under the orchestrator: asks for approval when needed, +builds a CommandSpec, and runs it under the current SandboxAttempt. +*/ +use crate::command_safety::is_dangerous_command::command_might_be_dangerous; +use crate::command_safety::is_safe_command::is_known_safe_command; +use crate::exec::ExecToolCallOutput; +use crate::protocol::SandboxPolicy; +use crate::sandboxing::execute_env; +use crate::tools::runtimes::build_command_spec; +use crate::tools::sandboxing::Approvable; +use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::Sandboxable; +use crate::tools::sandboxing::SandboxablePreference; +use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; +use crate::tools::sandboxing::ToolRuntime; +use crate::tools::sandboxing::with_cached_approval; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::ReviewDecision; +use futures::future::BoxFuture; +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub struct ShellRequest { + pub command: Vec, + pub cwd: PathBuf, + pub timeout_ms: Option, + pub env: std::collections::HashMap, + pub with_escalated_permissions: Option, + pub justification: Option, +} + +#[derive(Default)] +pub struct ShellRuntime; + +#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] +pub(crate) struct ApprovalKey { + command: Vec, + cwd: PathBuf, + escalated: bool, +} + +impl ShellRuntime { + pub fn new() -> Self { + Self + } + + fn stdout_stream(ctx: &ToolCtx<'_>) -> Option { + Some(crate::exec::StdoutStream { + sub_id: ctx.sub_id.clone(), + call_id: ctx.call_id.clone(), + tx_event: ctx.session.get_tx_event(), + }) + } +} + +impl Sandboxable for ShellRuntime { + fn sandbox_preference(&self) -> SandboxablePreference { + SandboxablePreference::Auto + } + fn escalate_on_failure(&self) -> bool { + true + } +} + +impl Approvable for ShellRuntime { + type ApprovalKey = ApprovalKey; + + fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey { + ApprovalKey { + command: req.command.clone(), + cwd: req.cwd.clone(), + escalated: req.with_escalated_permissions.unwrap_or(false), + } + } + + fn start_approval_async<'a>( + &'a mut self, + req: &'a ShellRequest, + ctx: ApprovalCtx<'a>, + ) -> BoxFuture<'a, ReviewDecision> { + let key = self.approval_key(req); + let command = req.command.clone(); + let cwd = req.cwd.clone(); + let reason = ctx + .retry_reason + .clone() + .or_else(|| req.justification.clone()); + let session = ctx.session; + let sub_id = ctx.sub_id.to_string(); + let call_id = ctx.call_id.to_string(); + Box::pin(async move { + with_cached_approval(&session.services, key, || async move { + session + .request_command_approval(sub_id, call_id, command, cwd, reason) + .await + }) + .await + }) + } + + fn wants_initial_approval( + &self, + req: &ShellRequest, + policy: AskForApproval, + sandbox_policy: &SandboxPolicy, + ) -> bool { + if is_known_safe_command(&req.command) { + return false; + } + match policy { + AskForApproval::Never | AskForApproval::OnFailure => false, + AskForApproval::OnRequest => { + // In DangerFullAccess, only prompt if the command looks dangerous. + if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) { + return command_might_be_dangerous(&req.command); + } + + // In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for + // non‑escalated, non‑dangerous commands — let the sandbox enforce + // restrictions (e.g., block network/write) without a user prompt. + let wants_escalation = req.with_escalated_permissions.unwrap_or(false); + if wants_escalation { + return true; + } + command_might_be_dangerous(&req.command) + } + AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command), + } + } + + fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool { + req.with_escalated_permissions.unwrap_or(false) + } +} + +impl ToolRuntime for ShellRuntime { + async fn run( + &mut self, + req: &ShellRequest, + attempt: &SandboxAttempt<'_>, + ctx: &ToolCtx<'_>, + ) -> Result { + let spec = build_command_spec( + &req.command, + &req.cwd, + &req.env, + req.timeout_ms, + req.with_escalated_permissions, + req.justification.clone(), + )?; + let env = attempt + .env_for(&spec) + .map_err(|err| ToolError::Codex(err.into()))?; + let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx)) + .await + .map_err(ToolError::Codex)?; + Ok(out) + } +} diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs new file mode 100644 index 00000000000..e642105551c --- /dev/null +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -0,0 +1,123 @@ +/* +Runtime: unified exec + +Handles approval + sandbox orchestration for unified exec requests, delegating to +the session manager to spawn PTYs once an ExecEnv is prepared. +*/ +use crate::error::CodexErr; +use crate::error::SandboxErr; +use crate::tools::runtimes::build_command_spec; +use crate::tools::sandboxing::Approvable; +use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::Sandboxable; +use crate::tools::sandboxing::SandboxablePreference; +use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; +use crate::tools::sandboxing::ToolRuntime; +use crate::tools::sandboxing::with_cached_approval; +use crate::unified_exec::UnifiedExecError; +use crate::unified_exec::UnifiedExecSession; +use crate::unified_exec::UnifiedExecSessionManager; +use codex_protocol::protocol::ReviewDecision; +use futures::future::BoxFuture; +use std::collections::HashMap; +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub struct UnifiedExecRequest { + pub command: Vec, + pub cwd: PathBuf, + pub env: HashMap, +} + +#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] +pub struct UnifiedExecApprovalKey { + pub command: Vec, + pub cwd: PathBuf, +} + +pub struct UnifiedExecRuntime<'a> { + manager: &'a UnifiedExecSessionManager, +} + +impl UnifiedExecRequest { + pub fn new(command: Vec, cwd: PathBuf, env: HashMap) -> Self { + Self { command, cwd, env } + } +} + +impl<'a> UnifiedExecRuntime<'a> { + pub fn new(manager: &'a UnifiedExecSessionManager) -> Self { + Self { manager } + } +} + +impl Sandboxable for UnifiedExecRuntime<'_> { + fn sandbox_preference(&self) -> SandboxablePreference { + SandboxablePreference::Auto + } + + fn escalate_on_failure(&self) -> bool { + true + } +} + +impl Approvable for UnifiedExecRuntime<'_> { + type ApprovalKey = UnifiedExecApprovalKey; + + fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey { + UnifiedExecApprovalKey { + command: req.command.clone(), + cwd: req.cwd.clone(), + } + } + + fn start_approval_async<'b>( + &'b mut self, + req: &'b UnifiedExecRequest, + ctx: ApprovalCtx<'b>, + ) -> BoxFuture<'b, ReviewDecision> { + let key = self.approval_key(req); + let session = ctx.session; + let sub_id = ctx.sub_id.to_string(); + let call_id = ctx.call_id.to_string(); + let command = req.command.clone(); + let cwd = req.cwd.clone(); + let reason = ctx.retry_reason.clone(); + Box::pin(async move { + with_cached_approval(&session.services, key, || async move { + session + .request_command_approval(sub_id, call_id, command, cwd, reason) + .await + }) + .await + }) + } +} + +impl<'a> ToolRuntime for UnifiedExecRuntime<'a> { + async fn run( + &mut self, + req: &UnifiedExecRequest, + attempt: &SandboxAttempt<'_>, + _ctx: &ToolCtx<'_>, + ) -> Result { + let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None) + .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?; + let exec_env = attempt + .env_for(&spec) + .map_err(|err| ToolError::Codex(err.into()))?; + self.manager + .open_session_with_exec_env(&exec_env) + .await + .map_err(|err| match err { + UnifiedExecError::SandboxDenied { output, .. } => { + ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(output), + })) + } + other => ToolError::Rejected(other.to_string()), + }) + } +} diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs new file mode 100644 index 00000000000..ed142da46c9 --- /dev/null +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -0,0 +1,190 @@ +//! Shared approvals and sandboxing traits used by tool runtimes. +//! +//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`, +//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits +//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.). + +use crate::codex::Session; +use crate::error::CodexErr; +use crate::protocol::SandboxPolicy; +use crate::sandboxing::CommandSpec; +use crate::sandboxing::SandboxManager; +use crate::sandboxing::SandboxTransformError; +use crate::state::SessionServices; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::ReviewDecision; +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::Hash; +use std::path::Path; + +use futures::Future; +use futures::future::BoxFuture; +use serde::Serialize; + +#[derive(Clone, Default, Debug)] +pub(crate) struct ApprovalStore { + // Store serialized keys for generic caching across requests. + map: HashMap, +} + +impl ApprovalStore { + pub fn get(&self, key: &K) -> Option + where + K: Serialize, + { + let s = serde_json::to_string(key).ok()?; + self.map.get(&s).cloned() + } + + pub fn put(&mut self, key: K, value: ReviewDecision) + where + K: Serialize, + { + if let Ok(s) = serde_json::to_string(&key) { + self.map.insert(s, value); + } + } +} + +pub(crate) async fn with_cached_approval( + services: &SessionServices, + key: K, + fetch: F, +) -> ReviewDecision +where + K: Serialize + Clone, + F: FnOnce() -> Fut, + Fut: Future, +{ + { + let store = services.tool_approvals.lock().await; + if let Some(decision) = store.get(&key) { + return decision; + } + } + + let decision = fetch().await; + + if matches!(decision, ReviewDecision::ApprovedForSession) { + let mut store = services.tool_approvals.lock().await; + store.put(key, ReviewDecision::ApprovedForSession); + } + + decision +} + +#[derive(Clone)] +pub(crate) struct ApprovalCtx<'a> { + pub session: &'a Session, + pub sub_id: &'a str, + pub call_id: &'a str, + pub retry_reason: Option, +} + +pub(crate) trait Approvable { + type ApprovalKey: Hash + Eq + Clone + Debug + Serialize; + + fn approval_key(&self, req: &Req) -> Self::ApprovalKey; + + /// Some tools may request to skip the sandbox on the first attempt + /// (e.g., when the request explicitly asks for escalated permissions). + /// Defaults to `false`. + fn wants_escalated_first_attempt(&self, _req: &Req) -> bool { + false + } + + fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool { + if already_approved { + // We do not ask one more time + return true; + } + matches!(policy, AskForApproval::Never) + } + + /// Decide whether an initial user approval should be requested before the + /// first attempt. Defaults to the orchestrator's behavior (pre‑refactor): + /// - Never, OnFailure: do not ask + /// - OnRequest: ask unless sandbox policy is DangerFullAccess + /// - UnlessTrusted: always ask + fn wants_initial_approval( + &self, + _req: &Req, + policy: AskForApproval, + sandbox_policy: &SandboxPolicy, + ) -> bool { + match policy { + AskForApproval::Never | AskForApproval::OnFailure => false, + AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess), + AskForApproval::UnlessTrusted => true, + } + } + + fn start_approval_async<'a>( + &'a mut self, + req: &'a Req, + ctx: ApprovalCtx<'a>, + ) -> BoxFuture<'a, ReviewDecision>; +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum SandboxablePreference { + Auto, + #[allow(dead_code)] // Will be used by later tools. + Require, + #[allow(dead_code)] // Will be used by later tools. + Forbid, +} + +pub(crate) trait Sandboxable { + fn sandbox_preference(&self) -> SandboxablePreference; + fn escalate_on_failure(&self) -> bool { + true + } +} + +pub(crate) struct ToolCtx<'a> { + pub session: &'a Session, + pub sub_id: String, + pub call_id: String, + pub tool_name: String, +} + +#[derive(Debug)] +pub(crate) enum ToolError { + Rejected(String), + SandboxDenied(String), + Codex(CodexErr), +} + +pub(crate) trait ToolRuntime: Approvable + Sandboxable { + async fn run( + &mut self, + req: &Req, + attempt: &SandboxAttempt<'_>, + ctx: &ToolCtx, + ) -> Result; +} + +pub(crate) struct SandboxAttempt<'a> { + pub sandbox: crate::exec::SandboxType, + pub policy: &'a crate::protocol::SandboxPolicy, + pub(crate) manager: &'a SandboxManager, + pub(crate) sandbox_cwd: &'a Path, + pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>, +} + +impl<'a> SandboxAttempt<'a> { + pub fn env_for( + &self, + spec: &CommandSpec, + ) -> Result { + self.manager.transform( + spec, + self.policy, + self.sandbox, + self.sandbox_cwd, + self.codex_linux_sandbox_exe, + ) + } +} diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 53520519176..c70da247ba1 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -815,12 +815,7 @@ pub(crate) fn build_specs( config: &ToolsConfig, mcp_tools: Option>, ) -> ToolRegistryBuilder { - use crate::exec_command::EXEC_COMMAND_TOOL_NAME; - use crate::exec_command::WRITE_STDIN_TOOL_NAME; - use crate::exec_command::create_exec_command_tool_for_responses_api; - use crate::exec_command::create_write_stdin_tool_for_responses_api; use crate::tools::handlers::ApplyPatchHandler; - use crate::tools::handlers::ExecStreamHandler; use crate::tools::handlers::GrepFilesHandler; use crate::tools::handlers::ListDirHandler; use crate::tools::handlers::McpHandler; @@ -836,7 +831,6 @@ pub(crate) fn build_specs( let mut builder = ToolRegistryBuilder::new(); let shell_handler = Arc::new(ShellHandler); - let exec_stream_handler = Arc::new(ExecStreamHandler); let unified_exec_handler = Arc::new(UnifiedExecHandler); let plan_handler = Arc::new(PlanHandler); let apply_patch_handler = Arc::new(ApplyPatchHandler); @@ -844,7 +838,10 @@ pub(crate) fn build_specs( let mcp_handler = Arc::new(McpHandler); let mcp_resource_handler = Arc::new(McpResourceHandler); - if config.experimental_unified_exec_tool { + let use_unified_exec = config.experimental_unified_exec_tool + || matches!(config.shell_type, ConfigShellToolType::Streamable); + + if use_unified_exec { builder.push_spec(create_unified_exec_tool()); builder.register_handler("unified_exec", unified_exec_handler); } else { @@ -856,14 +853,7 @@ pub(crate) fn build_specs( builder.push_spec(ToolSpec::LocalShell {}); } ConfigShellToolType::Streamable => { - builder.push_spec(ToolSpec::Function( - create_exec_command_tool_for_responses_api(), - )); - builder.push_spec(ToolSpec::Function( - create_write_stdin_tool_for_responses_api(), - )); - builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone()); - builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler); + // Already handled by use_unified_exec. } } } diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index 6bf5bf7ec57..bfbef93572b 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -1,22 +1,29 @@ +use crate::exec::ExecToolCallOutput; use thiserror::Error; #[derive(Debug, Error)] pub(crate) enum UnifiedExecError { - #[error("Failed to create unified exec session: {pty_error}")] - CreateSession { - #[source] - pty_error: anyhow::Error, - }, + #[error("Failed to create unified exec session: {message}")] + CreateSession { message: String }, #[error("Unknown session id {session_id}")] UnknownSessionId { session_id: i32 }, #[error("failed to write to stdin")] WriteToStdin, #[error("missing command line for unified exec request")] MissingCommandLine, + #[error("Command denied by sandbox: {message}")] + SandboxDenied { + message: String, + output: ExecToolCallOutput, + }, } impl UnifiedExecError { - pub(crate) fn create_session(error: anyhow::Error) -> Self { - Self::CreateSession { pty_error: error } + pub(crate) fn create_session(message: String) -> Self { + Self::CreateSession { message } + } + + pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self { + Self::SandboxDenied { message, output } } } diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index a8f754c92e9..fb791fe89ff 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -1,36 +1,55 @@ -use portable_pty::CommandBuilder; -use portable_pty::PtySize; -use portable_pty::native_pty_system; +//! Unified Exec: interactive PTY execution orchestrated with approvals + sandboxing. +//! +//! Responsibilities +//! - Manages interactive PTY sessions (create, reuse, buffer output with caps). +//! - Uses the shared ToolOrchestrator to handle approval, sandbox selection, and +//! retry semantics in a single, descriptive flow. +//! - Spawns the PTY from a sandbox‑transformed `ExecEnv`; on sandbox denial, +//! retries without sandbox when policy allows (no re‑prompt thanks to caching). +//! - Uses the shared `is_likely_sandbox_denied` heuristic to keep denial messages +//! consistent with other exec paths. +//! +//! Flow at a glance (open session) +//! 1) Build a small request `{ command, cwd }`. +//! 2) Orchestrator: approval (bypass/cache/prompt) → select sandbox → run. +//! 3) Runtime: transform `CommandSpec` → `ExecEnv` → spawn PTY. +//! 4) If denial, orchestrator retries with `SandboxType::None`. +//! 5) Session is returned with streaming output + metadata. +//! +//! This keeps policy logic and user interaction centralized while the PTY/session +//! concerns remain isolated here. The implementation is split between: +//! - `session.rs`: PTY session lifecycle + output buffering. +//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling. + use std::collections::HashMap; -use std::collections::VecDeque; -use std::io::ErrorKind; -use std::io::Read; -use std::sync::Arc; -use std::sync::Mutex as StdMutex; -use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicI32; -use std::sync::atomic::Ordering; + use tokio::sync::Mutex; -use tokio::sync::Notify; -use tokio::sync::mpsc; -use tokio::task::JoinHandle; -use tokio::time::Duration; -use tokio::time::Instant; -use crate::exec_command::ExecCommandSession; -use crate::truncate::truncate_middle; +use crate::codex::Session; +use crate::codex::TurnContext; mod errors; +mod session; +mod session_manager; pub(crate) use errors::UnifiedExecError; +pub(crate) use session::UnifiedExecSession; const DEFAULT_TIMEOUT_MS: u64 = 1_000; const MAX_TIMEOUT_MS: u64 = 60_000; const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB +pub(crate) struct UnifiedExecContext<'a> { + pub session: &'a Session, + pub turn: &'a TurnContext, + pub sub_id: &'a str, + pub call_id: &'a str, + pub session_id: Option, +} + #[derive(Debug)] pub(crate) struct UnifiedExecRequest<'a> { - pub session_id: Option, pub input_chunks: &'a [String], pub timeout_ms: Option, } @@ -44,380 +63,61 @@ pub(crate) struct UnifiedExecResult { #[derive(Debug, Default)] pub(crate) struct UnifiedExecSessionManager { next_session_id: AtomicI32, - sessions: Mutex>, -} - -#[derive(Debug)] -struct ManagedUnifiedExecSession { - session: ExecCommandSession, - output_buffer: OutputBuffer, - /// Notifies waiters whenever new output has been appended to - /// `output_buffer`, allowing clients to poll for fresh data. - output_notify: Arc, - output_task: JoinHandle<()>, -} - -#[derive(Debug, Default)] -struct OutputBufferState { - chunks: VecDeque>, - total_bytes: usize, + sessions: Mutex>, } -impl OutputBufferState { - fn push_chunk(&mut self, chunk: Vec) { - self.total_bytes = self.total_bytes.saturating_add(chunk.len()); - self.chunks.push_back(chunk); - - let mut excess = self - .total_bytes - .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES); - - while excess > 0 { - match self.chunks.front_mut() { - Some(front) if excess >= front.len() => { - excess -= front.len(); - self.total_bytes = self.total_bytes.saturating_sub(front.len()); - self.chunks.pop_front(); - } - Some(front) => { - front.drain(..excess); - self.total_bytes = self.total_bytes.saturating_sub(excess); - break; - } - None => break, - } - } - } - - fn drain(&mut self) -> Vec> { - let drained: Vec> = self.chunks.drain(..).collect(); - self.total_bytes = 0; - drained - } -} - -type OutputBuffer = Arc>; -type OutputHandles = (OutputBuffer, Arc); - -impl ManagedUnifiedExecSession { - fn new( - session: ExecCommandSession, - initial_output_rx: tokio::sync::broadcast::Receiver>, - ) -> Self { - let output_buffer = Arc::new(Mutex::new(OutputBufferState::default())); - let output_notify = Arc::new(Notify::new()); - let mut receiver = initial_output_rx; - let buffer_clone = Arc::clone(&output_buffer); - let notify_clone = Arc::clone(&output_notify); - let output_task = tokio::spawn(async move { - loop { - match receiver.recv().await { - Ok(chunk) => { - let mut guard = buffer_clone.lock().await; - guard.push_chunk(chunk); - drop(guard); - notify_clone.notify_waiters(); - } - // If we lag behind the broadcast buffer, skip missed - // messages but keep the task alive to continue streaming. - Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => { - continue; - } - // When the sender closes, exit the task. - Err(tokio::sync::broadcast::error::RecvError::Closed) => break, - } - } - }); - - Self { - session, - output_buffer, - output_notify, - output_task, - } - } - - fn writer_sender(&self) -> mpsc::Sender> { - self.session.writer_sender() - } +#[cfg(test)] +#[cfg(unix)] +mod tests { + use super::*; - fn output_handles(&self) -> OutputHandles { - ( - Arc::clone(&self.output_buffer), - Arc::clone(&self.output_notify), - ) - } + use crate::codex::Session; + use crate::codex::TurnContext; + use crate::codex::make_session_and_context; + use crate::protocol::AskForApproval; + use crate::protocol::SandboxPolicy; + use core_test_support::skip_if_sandbox; + use std::sync::Arc; + use tokio::time::Duration; - fn has_exited(&self) -> bool { - self.session.has_exited() - } -} + use super::session::OutputBufferState; -impl Drop for ManagedUnifiedExecSession { - fn drop(&mut self) { - self.output_task.abort(); + fn test_session_and_turn() -> (Arc, Arc) { + let (session, mut turn) = make_session_and_context(); + turn.approval_policy = AskForApproval::Never; + turn.sandbox_policy = SandboxPolicy::DangerFullAccess; + (Arc::new(session), Arc::new(turn)) } -} -impl UnifiedExecSessionManager { - pub async fn handle_request( - &self, - request: UnifiedExecRequest<'_>, + async fn run_unified_exec_request( + session: &Arc, + turn: &Arc, + session_id: Option, + input: Vec, + timeout_ms: Option, ) -> Result { - let (timeout_ms, timeout_warning) = match request.timeout_ms { - Some(requested) if requested > MAX_TIMEOUT_MS => ( - MAX_TIMEOUT_MS, - Some(format!( - "Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n" - )), - ), - Some(requested) => (requested, None), - None => (DEFAULT_TIMEOUT_MS, None), - }; - - let mut new_session: Option = None; - let session_id; - let writer_tx; - let output_buffer; - let output_notify; - - if let Some(existing_id) = request.session_id { - let mut sessions = self.sessions.lock().await; - match sessions.get(&existing_id) { - Some(session) => { - if session.has_exited() { - sessions.remove(&existing_id); - return Err(UnifiedExecError::UnknownSessionId { - session_id: existing_id, - }); - } - let (buffer, notify) = session.output_handles(); - session_id = existing_id; - writer_tx = session.writer_sender(); - output_buffer = buffer; - output_notify = notify; - } - None => { - return Err(UnifiedExecError::UnknownSessionId { - session_id: existing_id, - }); - } - } - drop(sessions); - } else { - let command = request.input_chunks.to_vec(); - let new_id = self.next_session_id.fetch_add(1, Ordering::SeqCst); - let (session, initial_output_rx) = create_unified_exec_session(&command).await?; - let managed_session = ManagedUnifiedExecSession::new(session, initial_output_rx); - let (buffer, notify) = managed_session.output_handles(); - writer_tx = managed_session.writer_sender(); - output_buffer = buffer; - output_notify = notify; - session_id = new_id; - new_session = Some(managed_session); - }; - - if request.session_id.is_some() { - let joined_input = request.input_chunks.join(" "); - if !joined_input.is_empty() && writer_tx.send(joined_input.into_bytes()).await.is_err() - { - return Err(UnifiedExecError::WriteToStdin); - } - } - - let mut collected: Vec = Vec::with_capacity(4096); - let start = Instant::now(); - let deadline = start + Duration::from_millis(timeout_ms); - - loop { - let drained_chunks; - let mut wait_for_output = None; - { - let mut guard = output_buffer.lock().await; - drained_chunks = guard.drain(); - if drained_chunks.is_empty() { - wait_for_output = Some(output_notify.notified()); - } - } - - if drained_chunks.is_empty() { - let remaining = deadline.saturating_duration_since(Instant::now()); - if remaining == Duration::ZERO { - break; - } - - let notified = wait_for_output.unwrap_or_else(|| output_notify.notified()); - tokio::pin!(notified); - tokio::select! { - _ = &mut notified => {} - _ = tokio::time::sleep(remaining) => break, - } - continue; - } - - for chunk in drained_chunks { - collected.extend_from_slice(&chunk); - } - - if Instant::now() >= deadline { - break; - } - } - - let (output, _maybe_tokens) = truncate_middle( - &String::from_utf8_lossy(&collected), - UNIFIED_EXEC_OUTPUT_MAX_BYTES, - ); - let output = if let Some(warning) = timeout_warning { - format!("{warning}{output}") - } else { - output + let request_input = input; + let request = UnifiedExecRequest { + input_chunks: &request_input, + timeout_ms, }; - let should_store_session = if let Some(session) = new_session.as_ref() { - !session.has_exited() - } else if request.session_id.is_some() { - let mut sessions = self.sessions.lock().await; - if let Some(existing) = sessions.get(&session_id) { - if existing.has_exited() { - sessions.remove(&session_id); - false - } else { - true - } - } else { - false - } - } else { - true - }; - - if should_store_session { - if let Some(session) = new_session { - self.sessions.lock().await.insert(session_id, session); - } - Ok(UnifiedExecResult { - session_id: Some(session_id), - output, - }) - } else { - Ok(UnifiedExecResult { - session_id: None, - output, - }) - } - } -} - -async fn create_unified_exec_session( - command: &[String], -) -> Result< - ( - ExecCommandSession, - tokio::sync::broadcast::Receiver>, - ), - UnifiedExecError, -> { - if command.is_empty() { - return Err(UnifiedExecError::MissingCommandLine); - } - - let pty_system = native_pty_system(); - - let pair = pty_system - .openpty(PtySize { - rows: 24, - cols: 80, - pixel_width: 0, - pixel_height: 0, - }) - .map_err(UnifiedExecError::create_session)?; - - // Safe thanks to the check at the top of the function. - let mut command_builder = CommandBuilder::new(command[0].clone()); - for arg in &command[1..] { - command_builder.arg(arg); + session + .services + .unified_exec_manager + .handle_request( + request, + UnifiedExecContext { + session, + turn: turn.as_ref(), + sub_id: "sub", + call_id: "call", + session_id, + }, + ) + .await } - let mut child = pair - .slave - .spawn_command(command_builder) - .map_err(UnifiedExecError::create_session)?; - let killer = child.clone_killer(); - - let (writer_tx, mut writer_rx) = mpsc::channel::>(128); - let (output_tx, _) = tokio::sync::broadcast::channel::>(256); - - let mut reader = pair - .master - .try_clone_reader() - .map_err(UnifiedExecError::create_session)?; - let output_tx_clone = output_tx.clone(); - let reader_handle = tokio::task::spawn_blocking(move || { - let mut buf = [0u8; 8192]; - loop { - match reader.read(&mut buf) { - Ok(0) => break, - Ok(n) => { - let _ = output_tx_clone.send(buf[..n].to_vec()); - } - Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, - Err(ref e) if e.kind() == ErrorKind::WouldBlock => { - std::thread::sleep(Duration::from_millis(5)); - continue; - } - Err(_) => break, - } - } - }); - - let writer = pair - .master - .take_writer() - .map_err(UnifiedExecError::create_session)?; - let writer = Arc::new(StdMutex::new(writer)); - let writer_handle = tokio::spawn({ - let writer = writer.clone(); - async move { - while let Some(bytes) = writer_rx.recv().await { - let writer = writer.clone(); - let _ = tokio::task::spawn_blocking(move || { - if let Ok(mut guard) = writer.lock() { - use std::io::Write; - let _ = guard.write_all(&bytes); - let _ = guard.flush(); - } - }) - .await; - } - } - }); - - let exit_status = Arc::new(AtomicBool::new(false)); - let wait_exit_status = Arc::clone(&exit_status); - let wait_handle = tokio::task::spawn_blocking(move || { - let _ = child.wait(); - wait_exit_status.store(true, Ordering::SeqCst); - }); - - let (session, initial_output_rx) = ExecCommandSession::new( - writer_tx, - output_tx, - killer, - reader_handle, - writer_handle, - wait_handle, - exit_status, - ); - Ok((session, initial_output_rx)) -} - -#[cfg(test)] -mod tests { - use super::*; - #[cfg(unix)] - use core_test_support::skip_if_sandbox; - #[test] fn push_chunk_trims_only_excess_bytes() { let mut buffer = OutputBufferState::default(); @@ -426,167 +126,170 @@ mod tests { buffer.push_chunk(vec![b'c']); assert_eq!(buffer.total_bytes, UNIFIED_EXEC_OUTPUT_MAX_BYTES); - assert_eq!(buffer.chunks.len(), 3); + let snapshot = buffer.snapshot(); + assert_eq!(snapshot.len(), 3); assert_eq!( - buffer.chunks.front().unwrap().len(), + snapshot.first().unwrap().len(), UNIFIED_EXEC_OUTPUT_MAX_BYTES - 2 ); - assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'c']); - assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'b']); + assert_eq!(snapshot.get(2).unwrap(), &vec![b'c']); + assert_eq!(snapshot.get(1).unwrap(), &vec![b'b']); } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> { + async fn unified_exec_persists_across_requests() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session_id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[ - "export".to_string(), - "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec![ + "export".to_string(), + "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), + ], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(2_500), + ) + .await?; assert!(out_2.output.contains("codex")); Ok(()) } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> { + async fn multi_unified_exec_sessions() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let shell_a = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let shell_a = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_a = shell_a.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_a), - input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &[ - "echo".to_string(), - "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_a), + vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + None, + vec![ + "echo".to_string(), + "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(), + ], + Some(2_500), + ) + .await?; assert!(!out_2.output.contains("codex")); - let out_3 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_a), - input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let out_3 = run_unified_exec_request( + &session, + &turn, + Some(session_a), + vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(2_500), + ) + .await?; assert!(out_3.output.contains("codex")); Ok(()) } - #[cfg(unix)] #[tokio::test] - async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> { + async fn unified_exec_timeouts() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[ - "export".to_string(), - "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(10), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec![ + "export".to_string(), + "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), + ], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(10), + ) + .await?; assert!(!out_2.output.contains("codex")); tokio::time::sleep(Duration::from_secs(7)).await; - let empty = Vec::new(); - let out_3 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &empty, - timeout_ms: Some(100), - }) - .await?; + let out_3 = + run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100)) + .await?; assert!(out_3.output.contains("codex")); Ok(()) } - #[cfg(unix)] #[tokio::test] #[ignore] // Ignored while we have a better way to test this. - async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> { - let manager = UnifiedExecSessionManager::default(); - - let result = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["echo".to_string(), "codex".to_string()], - timeout_ms: Some(120_000), - }) - .await?; + async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> { + let (session, turn) = test_session_and_turn(); + + let result = run_unified_exec_request( + &session, + &turn, + None, + vec!["echo".to_string(), "codex".to_string()], + Some(120_000), + ) + .await?; assert!(result.output.starts_with( "Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n" @@ -596,61 +299,66 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test] #[ignore] // Ignored while we have a better way to test this. - async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> { - let manager = UnifiedExecSessionManager::default(); - let result = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/echo".to_string(), "codex".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> { + let (session, turn) = test_session_and_turn(); + let result = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/echo".to_string(), "codex".to_string()], + Some(2_500), + ) + .await?; assert!(result.session_id.is_none()); assert!(result.output.contains("codex")); - assert!(manager.sessions.lock().await.is_empty()); + assert!( + session + .services + .unified_exec_manager + .sessions + .lock() + .await + .is_empty() + ); Ok(()) } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> { + async fn reusing_completed_session_returns_unknown_session() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["exit\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["exit\n".to_string()], + Some(2_500), + ) + .await?; tokio::time::sleep(Duration::from_millis(200)).await; - let err = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[], - timeout_ms: Some(100), - }) - .await - .expect_err("expected unknown session error"); + let err = + run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100)) + .await + .expect_err("expected unknown session error"); match err { UnifiedExecError::UnknownSessionId { session_id: err_id } => { @@ -659,7 +367,15 @@ mod tests { other => panic!("expected UnknownSessionId, got {other:?}"), } - assert!(!manager.sessions.lock().await.contains_key(&session_id)); + assert!( + !session + .services + .unified_exec_manager + .sessions + .lock() + .await + .contains_key(&session_id) + ); Ok(()) } diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs new file mode 100644 index 00000000000..bdb935f171c --- /dev/null +++ b/codex-rs/core/src/unified_exec/session.rs @@ -0,0 +1,217 @@ +#![allow(clippy::module_inception)] + +use std::collections::VecDeque; +use std::sync::Arc; + +use tokio::sync::Mutex; +use tokio::sync::Notify; +use tokio::sync::mpsc; +use tokio::sync::oneshot::error::TryRecvError; +use tokio::task::JoinHandle; +use tokio::time::Duration; + +use crate::exec::ExecToolCallOutput; +use crate::exec::SandboxType; +use crate::exec::StreamOutput; +use crate::exec::is_likely_sandbox_denied; +use crate::truncate::truncate_middle; +use codex_utils_pty::ExecCommandSession; +use codex_utils_pty::SpawnedPty; + +use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES; +use super::UnifiedExecError; + +#[derive(Debug, Default)] +pub(crate) struct OutputBufferState { + chunks: VecDeque>, + pub(crate) total_bytes: usize, +} + +impl OutputBufferState { + pub(super) fn push_chunk(&mut self, chunk: Vec) { + self.total_bytes = self.total_bytes.saturating_add(chunk.len()); + self.chunks.push_back(chunk); + + let mut excess = self + .total_bytes + .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES); + + while excess > 0 { + match self.chunks.front_mut() { + Some(front) if excess >= front.len() => { + excess -= front.len(); + self.total_bytes = self.total_bytes.saturating_sub(front.len()); + self.chunks.pop_front(); + } + Some(front) => { + front.drain(..excess); + self.total_bytes = self.total_bytes.saturating_sub(excess); + break; + } + None => break, + } + } + } + + pub(super) fn drain(&mut self) -> Vec> { + let drained: Vec> = self.chunks.drain(..).collect(); + self.total_bytes = 0; + drained + } + + pub(super) fn snapshot(&self) -> Vec> { + self.chunks.iter().cloned().collect() + } +} + +pub(crate) type OutputBuffer = Arc>; +pub(crate) type OutputHandles = (OutputBuffer, Arc); + +#[derive(Debug)] +pub(crate) struct UnifiedExecSession { + session: ExecCommandSession, + output_buffer: OutputBuffer, + output_notify: Arc, + output_task: JoinHandle<()>, + sandbox_type: SandboxType, +} + +impl UnifiedExecSession { + pub(super) fn new( + session: ExecCommandSession, + initial_output_rx: tokio::sync::broadcast::Receiver>, + sandbox_type: SandboxType, + ) -> Self { + let output_buffer = Arc::new(Mutex::new(OutputBufferState::default())); + let output_notify = Arc::new(Notify::new()); + let mut receiver = initial_output_rx; + let buffer_clone = Arc::clone(&output_buffer); + let notify_clone = Arc::clone(&output_notify); + let output_task = tokio::spawn(async move { + loop { + match receiver.recv().await { + Ok(chunk) => { + let mut guard = buffer_clone.lock().await; + guard.push_chunk(chunk); + drop(guard); + notify_clone.notify_waiters(); + } + Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue, + Err(tokio::sync::broadcast::error::RecvError::Closed) => break, + } + } + }); + + Self { + session, + output_buffer, + output_notify, + output_task, + sandbox_type, + } + } + + pub(super) fn writer_sender(&self) -> mpsc::Sender> { + self.session.writer_sender() + } + + pub(super) fn output_handles(&self) -> OutputHandles { + ( + Arc::clone(&self.output_buffer), + Arc::clone(&self.output_notify), + ) + } + + pub(super) fn has_exited(&self) -> bool { + self.session.has_exited() + } + + pub(super) fn exit_code(&self) -> Option { + self.session.exit_code() + } + + async fn snapshot_output(&self) -> Vec> { + let guard = self.output_buffer.lock().await; + guard.snapshot() + } + + fn sandbox_type(&self) -> SandboxType { + self.sandbox_type + } + + pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> { + if self.sandbox_type() == SandboxType::None || !self.has_exited() { + return Ok(()); + } + + let _ = + tokio::time::timeout(Duration::from_millis(20), self.output_notify.notified()).await; + + let collected_chunks = self.snapshot_output().await; + let mut aggregated: Vec = Vec::new(); + for chunk in collected_chunks { + aggregated.extend_from_slice(&chunk); + } + let aggregated_text = String::from_utf8_lossy(&aggregated).to_string(); + let exit_code = self.exit_code().unwrap_or(-1); + + let exec_output = ExecToolCallOutput { + exit_code, + stdout: StreamOutput::new(aggregated_text.clone()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new(aggregated_text.clone()), + duration: Duration::ZERO, + timed_out: false, + }; + + if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { + let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES); + let message = if snippet.is_empty() { + format!("exit code {exit_code}") + } else { + snippet + }; + return Err(UnifiedExecError::sandbox_denied(message, exec_output)); + } + + Ok(()) + } + + pub(super) async fn from_spawned( + spawned: SpawnedPty, + sandbox_type: SandboxType, + ) -> Result { + let SpawnedPty { + session, + output_rx, + mut exit_rx, + } = spawned; + let managed = Self::new(session, output_rx, sandbox_type); + + let exit_ready = match exit_rx.try_recv() { + Ok(_) | Err(TryRecvError::Closed) => true, + Err(TryRecvError::Empty) => false, + }; + + if exit_ready { + managed.check_for_sandbox_denial().await?; + return Ok(managed); + } + + tokio::pin!(exit_rx); + if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx) + .await + .is_ok() + { + managed.check_for_sandbox_denial().await?; + } + + Ok(managed) + } +} + +impl Drop for UnifiedExecSession { + fn drop(&mut self) { + self.output_task.abort(); + } +} diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs new file mode 100644 index 00000000000..8bc8cb293e2 --- /dev/null +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -0,0 +1,288 @@ +use std::sync::Arc; + +use tokio::sync::Notify; +use tokio::sync::mpsc; +use tokio::time::Duration; +use tokio::time::Instant; + +use crate::exec_env::create_env; +use crate::sandboxing::ExecEnv; +use crate::tools::orchestrator::ToolOrchestrator; +use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; +use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; +use crate::tools::sandboxing::ToolCtx; +use crate::truncate::truncate_middle; + +use super::DEFAULT_TIMEOUT_MS; +use super::MAX_TIMEOUT_MS; +use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES; +use super::UnifiedExecContext; +use super::UnifiedExecError; +use super::UnifiedExecRequest; +use super::UnifiedExecResult; +use super::UnifiedExecSessionManager; +use super::session::OutputBuffer; +use super::session::UnifiedExecSession; + +pub(super) struct SessionAcquisition { + pub(super) session_id: i32, + pub(super) writer_tx: mpsc::Sender>, + pub(super) output_buffer: OutputBuffer, + pub(super) output_notify: Arc, + pub(super) new_session: Option, + pub(super) reuse_requested: bool, +} + +impl UnifiedExecSessionManager { + pub(super) async fn acquire_session( + &self, + request: &UnifiedExecRequest<'_>, + context: &UnifiedExecContext<'_>, + ) -> Result { + if let Some(existing_id) = context.session_id { + let mut sessions = self.sessions.lock().await; + match sessions.get(&existing_id) { + Some(session) => { + if session.has_exited() { + sessions.remove(&existing_id); + return Err(UnifiedExecError::UnknownSessionId { + session_id: existing_id, + }); + } + let (buffer, notify) = session.output_handles(); + let writer_tx = session.writer_sender(); + Ok(SessionAcquisition { + session_id: existing_id, + writer_tx, + output_buffer: buffer, + output_notify: notify, + new_session: None, + reuse_requested: true, + }) + } + None => Err(UnifiedExecError::UnknownSessionId { + session_id: existing_id, + }), + } + } else { + let new_id = self + .next_session_id + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); + let managed_session = self + .open_session_with_sandbox(request.input_chunks.to_vec(), context) + .await?; + let (buffer, notify) = managed_session.output_handles(); + let writer_tx = managed_session.writer_sender(); + Ok(SessionAcquisition { + session_id: new_id, + writer_tx, + output_buffer: buffer, + output_notify: notify, + new_session: Some(managed_session), + reuse_requested: false, + }) + } + } + + pub(crate) async fn open_session_with_exec_env( + &self, + env: &ExecEnv, + ) -> Result { + let (program, args) = env + .command + .split_first() + .ok_or(UnifiedExecError::MissingCommandLine)?; + let spawned = + codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env) + .await + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; + UnifiedExecSession::from_spawned(spawned, env.sandbox).await + } + + pub(super) async fn open_session_with_sandbox( + &self, + command: Vec, + context: &UnifiedExecContext<'_>, + ) -> Result { + let mut orchestrator = ToolOrchestrator::new(); + let mut runtime = UnifiedExecRuntime::new(self); + let req = UnifiedExecToolRequest::new( + command, + context.turn.cwd.clone(), + create_env(&context.turn.shell_environment_policy), + ); + let tool_ctx = ToolCtx { + session: context.session, + sub_id: context.sub_id.to_string(), + call_id: context.call_id.to_string(), + tool_name: "unified_exec".to_string(), + }; + orchestrator + .run( + &mut runtime, + &req, + &tool_ctx, + context.turn, + context.turn.approval_policy, + ) + .await + .map_err(|e| UnifiedExecError::create_session(format!("{e:?}"))) + } + + pub(super) async fn collect_output_until_deadline( + output_buffer: &OutputBuffer, + output_notify: &Arc, + deadline: Instant, + ) -> Vec { + let mut collected: Vec = Vec::with_capacity(4096); + loop { + let drained_chunks; + let mut wait_for_output = None; + { + let mut guard = output_buffer.lock().await; + drained_chunks = guard.drain(); + if drained_chunks.is_empty() { + wait_for_output = Some(output_notify.notified()); + } + } + + if drained_chunks.is_empty() { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining == Duration::ZERO { + break; + } + + let notified = wait_for_output.unwrap_or_else(|| output_notify.notified()); + tokio::pin!(notified); + tokio::select! { + _ = &mut notified => {} + _ = tokio::time::sleep(remaining) => break, + } + continue; + } + + for chunk in drained_chunks { + collected.extend_from_slice(&chunk); + } + + if Instant::now() >= deadline { + break; + } + } + + collected + } + + pub(super) async fn should_store_session(&self, acquisition: &SessionAcquisition) -> bool { + if let Some(session) = acquisition.new_session.as_ref() { + !session.has_exited() + } else if acquisition.reuse_requested { + let mut sessions = self.sessions.lock().await; + if let Some(existing) = sessions.get(&acquisition.session_id) { + if existing.has_exited() { + sessions.remove(&acquisition.session_id); + false + } else { + true + } + } else { + false + } + } else { + true + } + } + + pub(super) async fn send_input_chunks( + writer_tx: &mpsc::Sender>, + chunks: &[String], + ) -> Result<(), UnifiedExecError> { + let mut trailing_whitespace = true; + for chunk in chunks { + if chunk.is_empty() { + continue; + } + + let leading_whitespace = chunk + .chars() + .next() + .map(char::is_whitespace) + .unwrap_or(true); + + if !trailing_whitespace + && !leading_whitespace + && writer_tx.send(vec![b' ']).await.is_err() + { + return Err(UnifiedExecError::WriteToStdin); + } + + if writer_tx.send(chunk.as_bytes().to_vec()).await.is_err() { + return Err(UnifiedExecError::WriteToStdin); + } + + trailing_whitespace = chunk + .chars() + .next_back() + .map(char::is_whitespace) + .unwrap_or(trailing_whitespace); + } + + Ok(()) + } + + pub async fn handle_request( + &self, + request: UnifiedExecRequest<'_>, + context: UnifiedExecContext<'_>, + ) -> Result { + let (timeout_ms, timeout_warning) = match request.timeout_ms { + Some(requested) if requested > MAX_TIMEOUT_MS => ( + MAX_TIMEOUT_MS, + Some(format!( + "Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n" + )), + ), + Some(requested) => (requested, None), + None => (DEFAULT_TIMEOUT_MS, None), + }; + + let mut acquisition = self.acquire_session(&request, &context).await?; + + if acquisition.reuse_requested { + Self::send_input_chunks(&acquisition.writer_tx, request.input_chunks).await?; + } + + let deadline = Instant::now() + Duration::from_millis(timeout_ms); + let collected = Self::collect_output_until_deadline( + &acquisition.output_buffer, + &acquisition.output_notify, + deadline, + ) + .await; + + let (output, _maybe_tokens) = truncate_middle( + &String::from_utf8_lossy(&collected), + UNIFIED_EXEC_OUTPUT_MAX_BYTES, + ); + let output = if let Some(warning) = timeout_warning { + format!("{warning}{output}") + } else { + output + }; + + let should_store_session = self.should_store_session(&acquisition).await; + let session_id = if should_store_session { + if let Some(session) = acquisition.new_session.take() { + self.sessions + .lock() + .await + .insert(acquisition.session_id, session); + } + Some(acquisition.session_id) + } else { + None + }; + + Ok(UnifiedExecResult { session_id, output }) + } +} diff --git a/codex-rs/core/tests/suite/exec.rs b/codex-rs/core/tests/suite/exec.rs index 2d1e99b0b73..ea5ab848797 100644 --- a/codex-rs/core/tests/suite/exec.rs +++ b/codex-rs/core/tests/suite/exec.rs @@ -36,6 +36,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result) -> Vec { - let mut out = Vec::new(); - while let Ok(ev) = rx.try_recv() { - if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent { - stream: ExecOutputStream::Stdout, - chunk, - .. - }) = ev.msg - { - out.extend_from_slice(&chunk); - } - } - out -} - -#[tokio::test] -async fn test_exec_stdout_stream_events_echo() { - let (tx, rx) = async_channel::unbounded::(); - - let stdout_stream = StdoutStream { - sub_id: "test-sub".to_string(), - call_id: "call-1".to_string(), - tx_event: tx, - }; - - let cmd = vec![ - "/bin/sh".to_string(), - "-c".to_string(), - // Use printf for predictable behavior across shells - "printf 'hello-world\n'".to_string(), - ]; - - let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); - let params = ExecParams { - command: cmd, - cwd: cwd.clone(), - timeout_ms: Some(5_000), - env: HashMap::new(), - with_escalated_permissions: None, - justification: None, - }; - - let policy = SandboxPolicy::new_read_only_policy(); - - let result = process_exec_tool_call( - params, - SandboxType::None, - &policy, - cwd.as_path(), - &None, - Some(stdout_stream), - ) - .await; - - let result = match result { - Ok(r) => r, - Err(e) => panic!("process_exec_tool_call failed: {e}"), - }; - - assert_eq!(result.exit_code, 0); - assert_eq!(result.stdout.text, "hello-world\n"); - - let streamed = collect_stdout_events(rx); - // We should have received at least the same contents (possibly in one chunk) - assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n"); -} - -#[tokio::test] -async fn test_exec_stderr_stream_events_echo() { - let (tx, rx) = async_channel::unbounded::(); - - let stdout_stream = StdoutStream { - sub_id: "test-sub".to_string(), - call_id: "call-2".to_string(), - tx_event: tx, - }; - - let cmd = vec![ - "/bin/sh".to_string(), - "-c".to_string(), - // Write to stderr explicitly - "printf 'oops\n' 1>&2".to_string(), - ]; - - let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); - let params = ExecParams { - command: cmd, - cwd: cwd.clone(), - timeout_ms: Some(5_000), - env: HashMap::new(), - with_escalated_permissions: None, - justification: None, - }; - - let policy = SandboxPolicy::new_read_only_policy(); - - let result = process_exec_tool_call( - params, - SandboxType::None, - &policy, - cwd.as_path(), - &None, - Some(stdout_stream), - ) - .await; - - let result = match result { - Ok(r) => r, - Err(e) => panic!("process_exec_tool_call failed: {e}"), - }; - - assert_eq!(result.exit_code, 0); - assert_eq!(result.stdout.text, ""); - assert_eq!(result.stderr.text, "oops\n"); - - // Collect only stderr delta events - let mut err = Vec::new(); - while let Ok(ev) = rx.try_recv() { - if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent { - stream: ExecOutputStream::Stderr, - chunk, - .. - }) = ev.msg - { - err.extend_from_slice(&chunk); - } - } - assert_eq!(String::from_utf8_lossy(&err), "oops\n"); -} - -#[tokio::test] -async fn test_aggregated_output_interleaves_in_order() { - // Spawn a shell that alternates stdout and stderr with sleeps to enforce order. - let cmd = vec![ - "/bin/sh".to_string(), - "-c".to_string(), - "printf 'O1\\n'; sleep 0.01; printf 'E1\\n' 1>&2; sleep 0.01; printf 'O2\\n'; sleep 0.01; printf 'E2\\n' 1>&2".to_string(), - ]; - - let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); - let params = ExecParams { - command: cmd, - cwd: cwd.clone(), - timeout_ms: Some(5_000), - env: HashMap::new(), - with_escalated_permissions: None, - justification: None, - }; - - let policy = SandboxPolicy::new_read_only_policy(); - - let result = process_exec_tool_call( - params, - SandboxType::None, - &policy, - cwd.as_path(), - &None, - None, - ) - .await - .expect("process_exec_tool_call"); - - assert_eq!(result.exit_code, 0); - assert_eq!(result.stdout.text, "O1\nO2\n"); - assert_eq!(result.stderr.text, "E1\nE2\n"); - assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n"); - assert_eq!(result.aggregated_output.truncated_after_lines, None); -} - -#[tokio::test] -async fn test_exec_timeout_returns_partial_output() { - let cmd = vec![ - "/bin/sh".to_string(), - "-c".to_string(), - "printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(), - ]; - - let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); - let params = ExecParams { - command: cmd, - cwd: cwd.clone(), - timeout_ms: Some(200), - env: HashMap::new(), - with_escalated_permissions: None, - justification: None, - }; - - let policy = SandboxPolicy::new_read_only_policy(); - - let result = process_exec_tool_call( - params, - SandboxType::None, - &policy, - cwd.as_path(), - &None, - None, - ) - .await; - - let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else { - panic!("expected timeout error"); - }; - - assert_eq!(output.exit_code, 124); - assert_eq!(output.stdout.text, "before\n"); - assert!(output.stderr.text.is_empty()); - assert_eq!(output.aggregated_output.text, "before\n"); - assert!(output.duration >= Duration::from_millis(200)); - assert!(output.timed_out); -} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 1a08c6df788..440eb1e893f 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -9,7 +9,6 @@ mod client; mod compact; mod compact_resume_fork; mod exec; -mod exec_stream_events; mod fork_conversation; mod grep_files; mod json_result; diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index 2e7fe317944..63ad3ccaea2 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -815,11 +815,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() { mount_sse( &server, sse(vec![ - ev_local_shell_call( - "user_approved_call", - "completed", - vec!["/bin/echo", "approved"], - ), + ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) @@ -881,11 +877,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() mount_sse( &server, sse(vec![ - ev_local_shell_call( - "user_approved_session_call", - "completed", - vec!["/bin/echo", "persist"], - ), + ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) @@ -947,11 +939,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { mount_sse( &server, sse(vec![ - ev_local_shell_call( - "sandbox_retry_call", - "completed", - vec!["/bin/echo", "retry"], - ), + ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) @@ -1013,7 +1001,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() { mount_sse( &server, sse(vec![ - ev_local_shell_call("user_denied_call", "completed", vec!["/bin/echo", "deny"]), + ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) @@ -1075,11 +1063,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() mount_sse( &server, sse(vec![ - ev_local_shell_call( - "sandbox_session_call", - "completed", - vec!["/bin/echo", "persist"], - ), + ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) @@ -1141,7 +1125,7 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() { mount_sse( &server, sse(vec![ - ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/echo", "deny"]), + ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]), ev_completed("done"), ]), ) diff --git a/codex-rs/linux-sandbox/src/linux_run_main.rs b/codex-rs/linux-sandbox/src/linux_run_main.rs index ac6ac445e4a..c4b0767fb63 100644 --- a/codex-rs/linux-sandbox/src/linux_run_main.rs +++ b/codex-rs/linux-sandbox/src/linux_run_main.rs @@ -8,8 +8,10 @@ use crate::landlock::apply_sandbox_policy_to_current_thread; pub struct LandlockCommand { /// It is possible that the cwd used in the context of the sandbox policy /// is different from the cwd of the process to spawn. + #[arg(long = "sandbox-policy-cwd")] pub sandbox_policy_cwd: PathBuf, + #[arg(long = "sandbox-policy")] pub sandbox_policy: codex_core::protocol::SandboxPolicy, /// Full command args to run under landlock. diff --git a/codex-rs/linux-sandbox/tests/suite/landlock.rs b/codex-rs/linux-sandbox/tests/suite/landlock.rs index 6af59ef0fa8..a1a3758307b 100644 --- a/codex-rs/linux-sandbox/tests/suite/landlock.rs +++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs @@ -44,6 +44,7 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) { env: create_env_from_core_vars(), with_escalated_permissions: None, justification: None, + arg0: None, }; let sandbox_policy = SandboxPolicy::WorkspaceWrite { @@ -146,6 +147,7 @@ async fn assert_network_blocked(cmd: &[&str]) { env: create_env_from_core_vars(), with_escalated_permissions: None, justification: None, + arg0: None, }; let sandbox_policy = SandboxPolicy::new_read_only_policy(); diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index a26892e5d4e..d7cd200f074 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -445,7 +445,7 @@ fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<() r#" model = "mock-model" approval_policy = "untrusted" -sandbox_policy = "read-only" +sandbox_policy = "workspace-write" model_provider = "mock_provider" diff --git a/codex-rs/utils/pty/Cargo.toml b/codex-rs/utils/pty/Cargo.toml new file mode 100644 index 00000000000..117d626318b --- /dev/null +++ b/codex-rs/utils/pty/Cargo.toml @@ -0,0 +1,16 @@ +[package] +edition = "2021" +name = "codex-utils-pty" +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +anyhow = { workspace = true } +portable-pty = { workspace = true } +tokio = { workspace = true, features = [ + "macros", + "rt-multi-thread", + "sync", +] } diff --git a/codex-rs/utils/pty/src/lib.rs b/codex-rs/utils/pty/src/lib.rs new file mode 100644 index 00000000000..740a502ba9b --- /dev/null +++ b/codex-rs/utils/pty/src/lib.rs @@ -0,0 +1,210 @@ +use std::collections::HashMap; +use std::io::ErrorKind; +use std::path::Path; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::sync::Mutex as StdMutex; +use std::time::Duration; + +use anyhow::Result; +use portable_pty::native_pty_system; +use portable_pty::CommandBuilder; +use portable_pty::PtySize; +use tokio::sync::broadcast; +use tokio::sync::mpsc; +use tokio::sync::oneshot; +use tokio::sync::Mutex as TokioMutex; +use tokio::task::JoinHandle; + +#[derive(Debug)] +pub struct ExecCommandSession { + writer_tx: mpsc::Sender>, + output_tx: broadcast::Sender>, + killer: StdMutex>>, + reader_handle: StdMutex>>, + writer_handle: StdMutex>>, + wait_handle: StdMutex>>, + exit_status: Arc, + exit_code: Arc>>, +} + +impl ExecCommandSession { + #[allow(clippy::too_many_arguments)] + pub fn new( + writer_tx: mpsc::Sender>, + output_tx: broadcast::Sender>, + killer: Box, + reader_handle: JoinHandle<()>, + writer_handle: JoinHandle<()>, + wait_handle: JoinHandle<()>, + exit_status: Arc, + exit_code: Arc>>, + ) -> (Self, broadcast::Receiver>) { + let initial_output_rx = output_tx.subscribe(); + ( + Self { + writer_tx, + output_tx, + killer: StdMutex::new(Some(killer)), + reader_handle: StdMutex::new(Some(reader_handle)), + writer_handle: StdMutex::new(Some(writer_handle)), + wait_handle: StdMutex::new(Some(wait_handle)), + exit_status, + exit_code, + }, + initial_output_rx, + ) + } + + pub fn writer_sender(&self) -> mpsc::Sender> { + self.writer_tx.clone() + } + + pub fn output_receiver(&self) -> broadcast::Receiver> { + self.output_tx.subscribe() + } + + pub fn has_exited(&self) -> bool { + self.exit_status.load(std::sync::atomic::Ordering::SeqCst) + } + + pub fn exit_code(&self) -> Option { + self.exit_code.lock().ok().and_then(|guard| *guard) + } +} + +impl Drop for ExecCommandSession { + fn drop(&mut self) { + if let Ok(mut killer_opt) = self.killer.lock() { + if let Some(mut killer) = killer_opt.take() { + let _ = killer.kill(); + } + } + + if let Ok(mut h) = self.reader_handle.lock() { + if let Some(handle) = h.take() { + handle.abort(); + } + } + if let Ok(mut h) = self.writer_handle.lock() { + if let Some(handle) = h.take() { + handle.abort(); + } + } + if let Ok(mut h) = self.wait_handle.lock() { + if let Some(handle) = h.take() { + handle.abort(); + } + } + } +} + +#[derive(Debug)] +pub struct SpawnedPty { + pub session: ExecCommandSession, + pub output_rx: broadcast::Receiver>, + pub exit_rx: oneshot::Receiver, +} + +pub async fn spawn_pty_process( + program: &str, + args: &[String], + cwd: &Path, + env: &HashMap, +) -> Result { + if program.is_empty() { + anyhow::bail!("missing program for PTY spawn"); + } + + let pty_system = native_pty_system(); + let pair = pty_system.openpty(PtySize { + rows: 24, + cols: 80, + pixel_width: 0, + pixel_height: 0, + })?; + + let mut command_builder = CommandBuilder::new(program); + command_builder.cwd(cwd); + command_builder.env_clear(); + for arg in args { + command_builder.arg(arg); + } + for (key, value) in env { + command_builder.env(key, value); + } + + let mut child = pair.slave.spawn_command(command_builder)?; + let killer = child.clone_killer(); + + let (writer_tx, mut writer_rx) = mpsc::channel::>(128); + let (output_tx, _) = broadcast::channel::>(256); + + let mut reader = pair.master.try_clone_reader()?; + let output_tx_clone = output_tx.clone(); + let reader_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || { + let mut buf = [0u8; 8_192]; + loop { + match reader.read(&mut buf) { + Ok(0) => break, + Ok(n) => { + let _ = output_tx_clone.send(buf[..n].to_vec()); + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(ref e) if e.kind() == ErrorKind::WouldBlock => { + std::thread::sleep(Duration::from_millis(5)); + continue; + } + Err(_) => break, + } + } + }); + + let writer = pair.master.take_writer()?; + let writer = Arc::new(TokioMutex::new(writer)); + let writer_handle: JoinHandle<()> = tokio::spawn({ + let writer = Arc::clone(&writer); + async move { + while let Some(bytes) = writer_rx.recv().await { + let mut guard = writer.lock().await; + use std::io::Write; + let _ = guard.write_all(&bytes); + let _ = guard.flush(); + } + } + }); + + let (exit_tx, exit_rx) = oneshot::channel::(); + let exit_status = Arc::new(AtomicBool::new(false)); + let wait_exit_status = Arc::clone(&exit_status); + let exit_code = Arc::new(StdMutex::new(None)); + let wait_exit_code = Arc::clone(&exit_code); + let wait_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || { + let code = match child.wait() { + Ok(status) => status.exit_code() as i32, + Err(_) => -1, + }; + wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst); + if let Ok(mut guard) = wait_exit_code.lock() { + *guard = Some(code); + } + let _ = exit_tx.send(code); + }); + + let (session, output_rx) = ExecCommandSession::new( + writer_tx, + output_tx, + killer, + reader_handle, + writer_handle, + wait_handle, + exit_status, + exit_code, + ); + + Ok(SpawnedPty { + session, + output_rx, + exit_rx, + }) +}