From ded6672ae67b35fedcc6e81fc99f9feedb7e2998 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Thu, 25 Sep 2025 12:03:27 -0700 Subject: [PATCH] feat: remove OPENAI_API_KEY from env and mlock if set --- codex-rs/Cargo.lock | 1 + codex-rs/arg0/Cargo.toml | 1 + codex-rs/arg0/src/lib.rs | 50 ++++++++++--- codex-rs/arg0/src/openai_api_key_env_var.rs | 80 +++++++++++++++++++++ codex-rs/cli/src/main.rs | 12 +++- codex-rs/exec/src/main.rs | 7 +- codex-rs/mcp-server/src/main.rs | 7 +- codex-rs/tui/src/main.rs | 7 +- 8 files changed, 151 insertions(+), 14 deletions(-) create mode 100644 codex-rs/arg0/src/openai_api_key_env_var.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 5ad73350c9..41d7527832 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -605,6 +605,7 @@ dependencies = [ "codex-core", "codex-linux-sandbox", "dotenvy", + "libc", "tempfile", "tokio", ] diff --git a/codex-rs/arg0/Cargo.toml b/codex-rs/arg0/Cargo.toml index 10d09e4a4b..5e8dbbafab 100644 --- a/codex-rs/arg0/Cargo.toml +++ b/codex-rs/arg0/Cargo.toml @@ -16,5 +16,6 @@ codex-apply-patch = { workspace = true } codex-core = { workspace = true } codex-linux-sandbox = { workspace = true } dotenvy = { workspace = true } +libc = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread"] } diff --git a/codex-rs/arg0/src/lib.rs b/codex-rs/arg0/src/lib.rs index e70ff2df64..81f7c712b0 100644 --- a/codex-rs/arg0/src/lib.rs +++ b/codex-rs/arg0/src/lib.rs @@ -7,10 +7,31 @@ use codex_core::CODEX_APPLY_PATCH_ARG1; use std::os::unix::fs::symlink; use tempfile::TempDir; +mod openai_api_key_env_var; +use openai_api_key_env_var::extract_locked_openai_api_key; + const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox"; const APPLY_PATCH_ARG0: &str = "apply_patch"; const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch"; +/// Arguments supplied to the async entrypoint invoked by [`arg0_dispatch_or_else`]. +/// +/// Currently, these args are not technically computed "pre-main," but they are +/// computed before spawning any threads / the tokio runtime. +#[derive(Debug)] +pub struct PreMainArgs { + pub codex_linux_sandbox_exe: Option, + + /// Value of the `OPENAI_API_KEY` environment variable that was set at + /// startup, if any. + /// + /// If `Some`, the key has already been removed from the environment and an + /// attempt was made to `mlock(2)` the string's allocation to keep it + /// resident in memory. The reference is leaked for the lifetime of the + /// process so it can be safely shared across threads without copying. + pub openai_api_key: Option<&'static str>, +} + /// While we want to deploy the Codex CLI as a single executable for simplicity, /// we also want to expose some of its functionality as distinct CLIs, so we use /// the "arg0 trick" to determine which CLI to dispatch. This effectively allows @@ -22,19 +43,20 @@ const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch"; /// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we: /// /// 1. Load `.env` values from `~/.codex/.env` before creating any threads. -/// 2. Construct a Tokio multi-thread runtime. +/// 2. Depending on `extract_openai_api_key`, extract and lock the +/// `OPENAI_API_KEY` environment variable, if present. /// 3. Derive the path to the current executable (so children can re-invoke the /// sandbox) when running on Linux. -/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any -/// error. Note that `main_fn` receives `codex_linux_sandbox_exe: -/// Option`, as an argument, which is generally needed as part of -/// constructing [`codex_core::config::Config`]. +/// 4. Construct a Tokio multi-thread runtime. +/// 5. Execute the provided async `main_fn` inside that runtime, forwarding any +/// error. The closure receives [`PreMainArgs`], which includes the optional +/// path to the linux sandbox helper as well as the OpenAI API key. /// /// This function should be used to wrap any `main()` function in binary crates /// in this workspace that depends on these helper CLIs. -pub fn arg0_dispatch_or_else(main_fn: F) -> anyhow::Result<()> +pub fn arg0_dispatch_or_else(extract_openai_api_key: bool, main_fn: F) -> anyhow::Result<()> where - F: FnOnce(Option) -> Fut, + F: FnOnce(PreMainArgs) -> Fut, Fut: Future>, { // Determine if we were invoked via the special alias. @@ -76,6 +98,13 @@ where // before creating any threads/the Tokio runtime. load_dotenv(); + // Perform the OPENAI_API_KEY check after loading the .env file. + let openai_api_key = if extract_openai_api_key { + extract_locked_openai_api_key() + } else { + None + }; + // Retain the TempDir so it exists for the lifetime of the invocation of // this executable. Admittedly, we could invoke `keep()` on it, but it // would be nice to avoid leaving temporary directories behind, if possible. @@ -99,7 +128,12 @@ where None }; - main_fn(codex_linux_sandbox_exe).await + let args = PreMainArgs { + codex_linux_sandbox_exe, + openai_api_key, + }; + + main_fn(args).await }) } diff --git a/codex-rs/arg0/src/openai_api_key_env_var.rs b/codex-rs/arg0/src/openai_api_key_env_var.rs new file mode 100644 index 0000000000..a0e99c78ce --- /dev/null +++ b/codex-rs/arg0/src/openai_api_key_env_var.rs @@ -0,0 +1,80 @@ +const OPENAI_API_KEY_ENV_VAR: &str = "OPENAI_API_KEY"; + +pub(crate) fn extract_locked_openai_api_key() -> Option<&'static str> { + match std::env::var(OPENAI_API_KEY_ENV_VAR) { + Ok(key) => { + if key.is_empty() { + return None; + } + + // Safety: modifying environment variables is only done before new + // threads are spawned. + clear_api_key_env_var(); + + // into_boxed_str() may reallocate, so only lock the memory after + // the final allocation is known. + let leaked: &'static mut str = Box::leak(key.into_boxed_str()); + mlock_str(leaked); + Some(leaked) + } + Err(std::env::VarError::NotPresent) => None, + Err(std::env::VarError::NotUnicode(_)) => { + // Cannot possibly be a valid API key, but we will clear it anyway. + clear_api_key_env_var(); + + None + } + } +} + +/// Note this does not guarantee that the memory is wiped, only that the +/// environment variable is removed from this process's environment. +fn clear_api_key_env_var() { + unsafe { + std::env::remove_var(OPENAI_API_KEY_ENV_VAR); + } +} + +#[cfg(unix)] +fn mlock_str(value: &str) { + use libc::_SC_PAGESIZE; + use libc::c_void; + use libc::mlock; + use libc::sysconf; + + if value.is_empty() { + return; + } + + // Safety: we only read the pointer and length for mlock bookkeeping. + let page_size = unsafe { sysconf(_SC_PAGESIZE) }; + if page_size <= 0 { + return; + } + let page_size = page_size as usize; + if page_size == 0 { + return; + } + + let addr = value.as_ptr() as usize; + let len = value.len(); + let start = addr & !(page_size - 1); + let addr_end = match addr.checked_add(len) { + Some(v) => match v.checked_add(page_size - 1) { + Some(total) => total, + None => return, + }, + None => return, + }; + let end = addr_end & !(page_size - 1); + let size = end.saturating_sub(start); + if size == 0 { + return; + } + + // Best-effort; ignore failures because mlock may require privileges. + let _ = unsafe { mlock(start as *const c_void, size) }; +} + +#[cfg(not(unix))] +fn mlock_str(_value: &str) {} diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index b1e9601c8c..71dbbaaccc 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -2,6 +2,7 @@ use clap::CommandFactory; use clap::Parser; use clap_complete::Shell; use clap_complete::generate; +use codex_arg0::PreMainArgs; use codex_arg0::arg0_dispatch_or_else; use codex_chatgpt::apply_command::ApplyCommand; use codex_chatgpt::apply_command::run_apply_command; @@ -224,13 +225,18 @@ fn pre_main_hardening() { } fn main() -> anyhow::Result<()> { - arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move { - cli_main(codex_linux_sandbox_exe).await?; + arg0_dispatch_or_else(true, |pre_main_args| async move { + cli_main(pre_main_args).await?; Ok(()) }) } -async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<()> { +async fn cli_main(pre_main_args: PreMainArgs) -> anyhow::Result<()> { + let PreMainArgs { + codex_linux_sandbox_exe, + openai_api_key: _, + } = pre_main_args; + let MultitoolCli { config_overrides: root_config_overrides, mut interactive, diff --git a/codex-rs/exec/src/main.rs b/codex-rs/exec/src/main.rs index 03ee533ea9..fb902d88ca 100644 --- a/codex-rs/exec/src/main.rs +++ b/codex-rs/exec/src/main.rs @@ -10,6 +10,7 @@ //! This allows us to ship a completely separate set of functionality as part //! of the `codex-exec` binary. use clap::Parser; +use codex_arg0::PreMainArgs; use codex_arg0::arg0_dispatch_or_else; use codex_common::CliConfigOverrides; use codex_exec::Cli; @@ -25,7 +26,11 @@ struct TopCli { } fn main() -> anyhow::Result<()> { - arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move { + arg0_dispatch_or_else(false, |pre_main_args| async move { + let PreMainArgs { + codex_linux_sandbox_exe, + openai_api_key: _, + } = pre_main_args; let top_cli = TopCli::parse(); // Merge root-level overrides into inner CLI struct so downstream logic remains unchanged. let mut inner = top_cli.inner; diff --git a/codex-rs/mcp-server/src/main.rs b/codex-rs/mcp-server/src/main.rs index 314944fab5..773f69ad52 100644 --- a/codex-rs/mcp-server/src/main.rs +++ b/codex-rs/mcp-server/src/main.rs @@ -1,9 +1,14 @@ +use codex_arg0::PreMainArgs; use codex_arg0::arg0_dispatch_or_else; use codex_common::CliConfigOverrides; use codex_mcp_server::run_main; fn main() -> anyhow::Result<()> { - arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move { + arg0_dispatch_or_else(false, |pre_main_args| async move { + let PreMainArgs { + codex_linux_sandbox_exe, + openai_api_key: _, + } = pre_main_args; run_main(codex_linux_sandbox_exe, CliConfigOverrides::default()).await?; Ok(()) }) diff --git a/codex-rs/tui/src/main.rs b/codex-rs/tui/src/main.rs index 50ea95f170..966b9a8743 100644 --- a/codex-rs/tui/src/main.rs +++ b/codex-rs/tui/src/main.rs @@ -1,4 +1,5 @@ use clap::Parser; +use codex_arg0::PreMainArgs; use codex_arg0::arg0_dispatch_or_else; use codex_common::CliConfigOverrides; use codex_tui::Cli; @@ -14,7 +15,11 @@ struct TopCli { } fn main() -> anyhow::Result<()> { - arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move { + arg0_dispatch_or_else(false, |pre_main_args| async move { + let PreMainArgs { + codex_linux_sandbox_exe, + openai_api_key: _, + } = pre_main_args; let top_cli = TopCli::parse(); let mut inner = top_cli.inner; inner