diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 103e8cf6bd..bdfe53a90e 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -78,8 +78,7 @@ jobs: - uses: dtolnay/rust-toolchain@1.90 - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2 with: - tool: cargo-shear - version: 1.5.1 + tool: cargo-shear@1.5.1 - name: cargo shear run: cargo shear @@ -153,6 +152,21 @@ jobs: targets: ${{ matrix.target }} components: clippy + - name: Normalize Git EOL behavior (Windows) + if: runner.os == 'Windows' + shell: bash + run: | + set -euo pipefail + git config --global core.autocrlf false + git config --global core.eol lf + { + echo "### Git config on runner — ${RUNNER_OS} (lint_build)"; + echo; + echo '```'; + git config --list --show-origin | sort || true; + echo '```'; + } >> "$GITHUB_STEP_SUMMARY" + # Explicit cache restore: split cargo home vs target, so we can # avoid caching the large target dir on the gnu-dev job. - name: Restore cargo home cache @@ -172,8 +186,7 @@ jobs: - name: Install sccache uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2 with: - tool: sccache - version: 0.7.5 + tool: sccache@0.7.5 - name: Configure sccache backend shell: bash @@ -230,8 +243,7 @@ jobs: if: ${{ matrix.profile == 'release' }} uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2 with: - tool: cargo-chef - version: 0.1.71 + tool: cargo-chef@0.1.71 - name: Pre-warm dependency cache (cargo-chef) if: ${{ matrix.profile == 'release' }} @@ -372,8 +384,7 @@ jobs: - name: Install sccache uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2 with: - tool: sccache - version: 0.7.5 + tool: sccache@0.7.5 - name: Configure sccache backend shell: bash @@ -401,8 +412,7 @@ jobs: - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2 with: - tool: nextest - version: 0.9.103 + tool: nextest@0.9.103 - name: tests id: test diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index e8ce0bfe6a..97bf2f53a4 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -891,6 +891,8 @@ dependencies = [ "anyhow", "assert_cmd", "assert_matches", + "clap", + "pathdiff", "pretty_assertions", "similar", "tempfile", diff --git a/codex-rs/apply-patch/Cargo.toml b/codex-rs/apply-patch/Cargo.toml index a239cd6310..68386c500f 100644 --- a/codex-rs/apply-patch/Cargo.toml +++ b/codex-rs/apply-patch/Cargo.toml @@ -1,28 +1,36 @@ -[package] -edition = "2024" -name = "codex-apply-patch" -version = { workspace = true } - -[lib] -name = "codex_apply_patch" -path = "src/lib.rs" - -[[bin]] -name = "apply_patch" -path = "src/main.rs" - -[lints] -workspace = true - +[package] +edition = "2024" +name = "codex-apply-patch" +version = { workspace = true } + +[lib] +name = "codex_apply_patch" +path = "src/lib.rs" + +[[bin]] +name = "apply_patch" +path = "src/main.rs" + +[lints] +workspace = true + [dependencies] anyhow = { workspace = true } similar = { workspace = true } thiserror = { workspace = true } tree-sitter = { workspace = true } tree-sitter-bash = { workspace = true } - -[dev-dependencies] -assert_cmd = { workspace = true } -assert_matches = { workspace = true } -pretty_assertions = { workspace = true } -tempfile = { workspace = true } +clap = { version = "4", features = ["derive"] } +pathdiff = "0.2" + +[dev-dependencies] +assert_cmd = { workspace = true } +assert_matches = { workspace = true } +pretty_assertions = { workspace = true } +tempfile = { workspace = true } + +[features] +default = ["eol-cache"] +# Compile-time gate for EOL caching. When disabled, the code falls back to +# raw git lookups with identical behavior. +eol-cache = [] diff --git a/codex-rs/apply-patch/src/eol.rs b/codex-rs/apply-patch/src/eol.rs new file mode 100644 index 0000000000..4a5bfb7236 --- /dev/null +++ b/codex-rs/apply-patch/src/eol.rs @@ -0,0 +1,713 @@ +//! EOL selection and normalization for apply-patch writes. +//! +//! Precedence used when choosing EOLs for writes: +//! - CLI/env override wins (lf|crlf|git|detect) +//! - .gitattributes (path-specific) → lf/crlf/native; binary or -text => Unknown (skip) +//! - For new files only: if no attribute matches, default to LF (not OS/native) +//! - Detect from content (existing files only; callers sniff original bytes) +//! +//! Notes: +//! - Existing files: when no CLI/env override, callers should infer from the bytes +//! already in memory (original buffer or nearby hunk context). Do not re-read the file. +//! - Normalization only happens on final disk writes; previews/summaries may remain LF. +//! - Trailing newline presence is preserved exactly; we do not add or remove it. + +use pathdiff::diff_paths; +#[cfg(feature = "eol-cache")] +use std::collections::HashMap; +use std::path::Path; +#[cfg(feature = "eol-cache")] +use std::path::PathBuf; +use std::sync::LazyLock; + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum Eol { + Lf, + Crlf, + Unknown, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum AssumeEol { + Unspecified, + Git, + Detect, + Lf, + Crlf, +} + +static ASSUME_EOL: LazyLock> = + LazyLock::new(|| std::sync::Mutex::new(assume_eol_from_env())); + +fn assume_eol_from_env() -> AssumeEol { + match std::env::var("APPLY_PATCH_ASSUME_EOL") { + Ok(v) => parse_assume_eol(&v).unwrap_or(AssumeEol::Unspecified), + Err(_) => AssumeEol::Unspecified, + } +} + +pub fn set_assume_eol(a: AssumeEol) { + if let Ok(mut guard) = ASSUME_EOL.lock() { + *guard = a; + } +} + +pub fn get_assume_eol() -> AssumeEol { + ASSUME_EOL + .lock() + .map(|g| *g) + .unwrap_or(AssumeEol::Unspecified) +} + +pub fn parse_assume_eol(s: &str) -> Option { + let val = s.trim().to_ascii_lowercase(); + match val.as_str() { + "lf" => Some(AssumeEol::Lf), + "crlf" => Some(AssumeEol::Crlf), + "git" => Some(AssumeEol::Git), + "detect" => Some(AssumeEol::Detect), + _ => None, + } +} + +pub fn os_native_eol() -> Eol { + if cfg!(windows) { Eol::Crlf } else { Eol::Lf } +} + +// Byte-based detection that counts CRLF vs lone LF to handle mixed files. +pub fn detect_eol_from_bytes(buf: &[u8]) -> Eol { + let mut crlf = 0i32; + let mut lf = 0i32; + let mut i = 0usize; + while i < buf.len() { + if buf[i] == b'\n' { + if i > 0 && buf[i - 1] == b'\r' { + crlf += 1; + } else { + lf += 1; + } + } + i += 1; + } + if crlf == 0 && lf == 0 { + return Eol::Unknown; + } + if crlf >= lf { Eol::Crlf } else { Eol::Lf } +} + +// Preserve whether the original had a trailing newline. Do NOT add or remove it. +pub fn normalize_to_eol_preserve_eof(mut s: String, target: Eol) -> String { + let had_trailing_nl = s.as_bytes().last().map(|b| *b == b'\n').unwrap_or(false); + let eol_str = match target { + Eol::Crlf => "\r\n", + Eol::Lf | Eol::Unknown => "\n", + }; + s = s.replace("\r\n", "\n"); + if matches!(target, Eol::Crlf) { + s = s.replace('\n', "\r\n"); + } + let ends_with_target = s.ends_with(eol_str); + match (had_trailing_nl, ends_with_target) { + (true, false) => s.push_str(eol_str), + (false, true) => { + let new_len = s.len().saturating_sub(eol_str.len()); + s.truncate(new_len); + } + _ => {} + } + s +} + +pub fn git_core_eol(repo_root: &Path) -> Option { + #[cfg(all(test, feature = "eol-cache"))] + RAW_CORE_EOL.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let mut cmd = std::process::Command::new("git"); + cmd.arg("-C") + .arg(repo_root) + .arg("config") + .arg("--local") + .arg("--get") + .arg("core.eol"); + cmd.current_dir(repo_root); + cmd.env("GIT_CONFIG_NOSYSTEM", "1") + .env("GIT_CONFIG_GLOBAL", "/dev/null"); + let out = cmd.output().ok()?; + #[cfg(test)] + { + let status = out.status; + eprintln!("git core.eol status: {status:?}"); + } + if !out.status.success() { + return None; + } + let val = String::from_utf8_lossy(&out.stdout) + .trim() + .to_ascii_lowercase(); + match val.as_str() { + "lf" => Some(Eol::Lf), + "crlf" => Some(Eol::Crlf), + "native" => Some(os_native_eol()), + _ => None, + } +} + +// Helper for core.autocrlf was used in production previously; tests rely on +// core.eol coverage now, so this is intentionally omitted. + +pub fn git_check_attr_eol(repo_root: &Path, rel_path: &Path) -> Option { + #[cfg(all(test, feature = "eol-cache"))] + RAW_ATTR.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + // Normalize to repo-relative path with forward slashes, regardless of OS. + // This avoids git check-attr mismatches on macOS/Linux when absolute paths + // or OS-native separators are used. + let rel_key = rel_attr_key(repo_root, rel_path); + let mut cmd = std::process::Command::new("git"); + cmd.arg("-C") + .arg(repo_root) + // Use /dev/null on all platforms to avoid Git for Windows NUL quirks + .arg("-c") + .arg("core.attributesfile=/dev/null") + .arg("check-attr") + .arg("-z") + .arg("eol") + .arg("--") + .arg(&rel_key); + // Ensure attribute resolution is repo-local only (no system/global leakage) + cmd.current_dir(repo_root); + cmd.env("GIT_ATTR_NOSYSTEM", "1") + .env("GIT_CONFIG_NOSYSTEM", "1") + .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("LANG", "C") + .env("LC_ALL", "C"); + let out = cmd.output().ok()?; + if !out.status.success() { + return None; + } + // Parse null-separated: "\0eol\0\0" + let z = String::from_utf8_lossy(&out.stdout); + #[cfg(test)] + { + let s = z.trim_end_matches('\0'); + eprintln!("git check-attr eol stdout: {s:?}"); + } + let mut parts = z.split('\0'); + let _p = parts.next(); + let _k = parts.next(); + if let Some(v) = parts.next() { + let v = v.trim().to_ascii_lowercase(); + match v.as_str() { + "lf" => Some(Eol::Lf), + "crlf" => Some(Eol::Crlf), + _ => None, + } + } else { + None + } +} + +/// Decide EOL based on repo policy and CLI/env. +/// - For existing files (is_new_file=false): +/// - If CLI override is Lf/Crlf => return it +/// - If CLI override is Git => consult Git and return if specified; Unknown for binary/-text +/// - Otherwise return Unknown so caller can detect from original bytes they already hold +/// - For new files (is_new_file=true): +/// - CLI override Lf/Crlf wins +/// - CLI override Git => consult Git +/// - Otherwise consult .gitattributes → core.eol → core.autocrlf +/// - Fall back to OS native; detection from patch bytes should be handled by caller +// +// Caching layer +#[cfg(feature = "eol-cache")] +type AttrKey = (PathBuf, String); +#[cfg(all(test, feature = "eol-cache"))] +static CORE_EOL_CACHE: LazyLock>>> = + LazyLock::new(|| std::sync::Mutex::new(HashMap::new())); +#[cfg(feature = "eol-cache")] +static ATTRS_CACHE: LazyLock>>> = + LazyLock::new(|| std::sync::Mutex::new(HashMap::new())); + +#[cfg(feature = "eol-cache")] +fn canonical_repo_root(p: &Path) -> PathBuf { + std::fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf()) +} + +pub(crate) fn norm_rel_key(rel: &Path) -> String { + let s = rel.to_string_lossy().replace('\\', "/"); + if cfg!(windows) { + s.to_ascii_lowercase() + } else { + s + } +} + +#[cfg(all(test, feature = "eol-cache"))] +fn git_core_eol_cached(repo_root: &Path) -> Option { + let key = canonical_repo_root(repo_root); + if let Ok(mut m) = CORE_EOL_CACHE.lock() { + if let Some(v) = m.get(&key) { + return *v; + } + let v = git_core_eol(&key); + m.insert(key, v); + return v; + } + git_core_eol(repo_root) +} +// Only used by tests; no non-test variant needed. + +// Note: git_core_autocrlf_* no longer used in production code; omitted outside tests. + +#[cfg(feature = "eol-cache")] +pub(crate) fn git_check_attr_eol_cached(repo_root: &Path, rel_path: &Path) -> Option { + let rkey = canonical_repo_root(repo_root); + let pkey = norm_rel_key(rel_path); + if let Ok(mut m) = ATTRS_CACHE.lock() { + if let Some(v) = m.get(&(rkey.clone(), pkey.clone())) { + return *v; + } + let v = git_check_attr_eol(&rkey, Path::new(&pkey)); + m.insert((rkey, pkey), v); + return v; + } + git_check_attr_eol(repo_root, rel_path) +} +#[cfg(not(feature = "eol-cache"))] +pub(crate) fn git_check_attr_eol_cached(repo_root: &Path, rel_path: &Path) -> Option { + git_check_attr_eol(repo_root, rel_path) +} + +#[cfg(feature = "eol-cache")] +pub fn notify_gitattributes_touched(repo_root: &Path) { + let key = canonical_repo_root(repo_root); + if let Ok(mut m) = ATTRS_CACHE.lock() { + m.retain(|(root, _), _| root != &key); + } +} +#[cfg(not(feature = "eol-cache"))] +pub fn notify_gitattributes_touched(_repo_root: &Path) {} + +pub fn decide_eol(repo_root: Option<&Path>, rel_path: Option<&Path>, is_new_file: bool) -> Eol { + match get_assume_eol() { + AssumeEol::Lf => return Eol::Lf, + AssumeEol::Crlf => return Eol::Crlf, + AssumeEol::Git => { + // Respect only path-specific attributes. Avoid global git core.* + // settings to keep behavior deterministic across runners. + if let (Some(root), Some(rel)) = (repo_root, rel_path) + && let Some(e) = git_check_attr_eol_cached(root, rel) + { + return e; + } + // No attribute match: default to LF for new files, Unknown for existing. + return if is_new_file { Eol::Lf } else { Eol::Unknown }; + } + AssumeEol::Detect | AssumeEol::Unspecified => {} + } + + if !is_new_file { + // Existing: let caller decide from original bytes + return Eol::Unknown; + } + // New file without explicit CLI override: use the same chain as choose_eol_for_new_file. + if let Some(root) = repo_root { + let rel = rel_path.unwrap_or_else(|| Path::new(".")); + return choose_eol_for_new_file(root, rel, &OsEnv); + } + Eol::Lf +} + +/// Lightweight environment accessor used to control overrides in tests. +pub trait Env { + fn get(&self, key: &str) -> Option; +} + +/// Default OS environment implementation. +pub struct OsEnv; +impl Env for OsEnv { + fn get(&self, key: &str) -> Option { + std::env::var(key).ok() + } +} + +fn git_core_autocrlf(repo_root: &Path) -> Option { + let mut cmd = std::process::Command::new("git"); + cmd.arg("-C") + .arg(repo_root) + .arg("config") + .arg("--local") + .arg("--get") + .arg("core.autocrlf"); + cmd.current_dir(repo_root); + cmd.env("GIT_CONFIG_NOSYSTEM", "1") + .env("GIT_CONFIG_GLOBAL", "/dev/null"); + let out = cmd.output().ok()?; + #[cfg(test)] + { + let status = out.status; + eprintln!("git core.autocrlf status: {status:?}"); + } + if !out.status.success() { + return None; + } + let val = String::from_utf8_lossy(&out.stdout) + .trim() + .to_ascii_lowercase(); + #[cfg(test)] + eprintln!("git core.autocrlf: {val:?}"); + match val.as_str() { + "true" => Some(Eol::Crlf), + "input" => Some(Eol::Lf), + _ => None, + } +} + +/// Choose the EOL for a new file using deterministic precedence: +/// explicit override (lf/crlf) > .gitattributes > core.eol > core.autocrlf > default LF. +/// If the override is `git` or `detect`, fall back to repo policy (or LF if none). +pub fn choose_eol_for_new_file(repo_root: &Path, rel_path: &Path, env: &dyn Env) -> Eol { + let from_git = git_check_attr_eol_cached(repo_root, rel_path) + .or_else(|| git_core_eol(repo_root)) + .or_else(|| git_core_autocrlf(repo_root)); + + if let Some(v) = env.get("APPLY_PATCH_ASSUME_EOL") + && let Some(sel) = parse_assume_eol(&v) + { + return match sel { + AssumeEol::Lf => Eol::Lf, + AssumeEol::Crlf => Eol::Crlf, + // For Git or Detect, fall back to repo policy if available, else LF. + _ => from_git.unwrap_or(Eol::Lf), + }; + } + from_git.unwrap_or(Eol::Lf) +} + +/// Compute a repo-relative attribute lookup key using forward slashes. +/// Falls back to the provided path's display string if a relative path +/// cannot be determined (should be rare). +fn rel_attr_key(repo_root: &Path, path: &Path) -> String { + // On macOS and Windows, repo_root and path may not share the same + // canonical representation (e.g., /var vs /private/var, case + // differences, or symlinks). Canonicalize both sides before + // computing the relative key that we pass to `git check-attr` so + // .gitattributes matching is reliable for new files that do not + // yet exist on disk. + let canon_root = std::fs::canonicalize(repo_root).unwrap_or_else(|_| repo_root.to_path_buf()); + + // If the file doesn't exist yet (typical for Add File), try to + // canonicalize its parent directory and then join the file name so + // we still get a path under the canonicalized repo root. + let canon_path = match std::fs::canonicalize(path) { + Ok(p) => p, + Err(_) => { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + match std::fs::canonicalize(parent) { + Ok(cp) => cp.join(path.file_name().unwrap_or_default()), + Err(_) => path.to_path_buf(), + } + } + }; + + let rel: std::path::PathBuf = + diff_paths(&canon_path, &canon_root).unwrap_or_else(|| path.to_path_buf()); + let s = rel.to_string_lossy().replace('\\', "/"); + s.trim_start_matches('/').to_string() +} + +/// Parse stdout from `git check-attr eol -- ` and return EOL if +/// explicitly specified. Treat `auto`/`unspecified` as None. +#[allow(dead_code)] +fn parse_git_check_attr_eol_stdout(stdout: &str) -> Option { + for line in stdout.lines() { + // Expected: ": eol: " + if let Some((_, rhs)) = line.split_once(": eol: ") { + let v = rhs.trim().to_ascii_lowercase(); + return match v.as_str() { + "lf" => Some(Eol::Lf), + "crlf" => Some(Eol::Crlf), + "native" => Some(os_native_eol()), + // 'auto' or 'unspecified' or others => no decision + _ => None, + }; + } + } + None +} + +// Test instrumentation and unit tests for caching +#[cfg(all(test, feature = "eol-cache"))] +static RAW_CORE_EOL: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); +#[cfg(all(test, feature = "eol-cache"))] +static RAW_AUTOCRLF: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); +#[cfg(all(test, feature = "eol-cache"))] +static RAW_ATTR: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); + +#[cfg(all(test, feature = "eol-cache"))] +pub fn reset_git_counters() { + RAW_CORE_EOL.store(0, std::sync::atomic::Ordering::Relaxed); + RAW_AUTOCRLF.store(0, std::sync::atomic::Ordering::Relaxed); + RAW_ATTR.store(0, std::sync::atomic::Ordering::Relaxed); +} + +#[cfg(all(test, feature = "eol-cache"))] +pub fn raw_counts() -> (usize, usize, usize) { + ( + RAW_CORE_EOL.load(std::sync::atomic::Ordering::Relaxed), + RAW_AUTOCRLF.load(std::sync::atomic::Ordering::Relaxed), + RAW_ATTR.load(std::sync::atomic::Ordering::Relaxed), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + #[cfg(feature = "eol-cache")] + use tempfile::tempdir; + + // Serialize tests that touch global RAW_* counters to avoid flakiness + // when tests run in parallel. + static TEST_MUTEX: LazyLock> = LazyLock::new(|| Mutex::new(())); + + #[cfg(feature = "eol-cache")] + #[test] + fn test_core_eol_cached_only_runs_git_once() { + let _g = TEST_MUTEX.lock().unwrap(); + reset_git_counters(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::process::Command::new("git") + .args([ + "-C", + dir.path().to_str().unwrap(), + "config", + "core.eol", + "lf", + ]) + .status() + .unwrap(); + assert_eq!(git_core_eol_cached(dir.path()), Some(Eol::Lf)); + assert_eq!(git_core_eol_cached(dir.path()), Some(Eol::Lf)); + let (core, _, _) = raw_counts(); + assert_eq!(core, 1); + } + + #[cfg(feature = "eol-cache")] + #[test] + fn test_attrs_cache_and_invalidate() { + let _g = TEST_MUTEX.lock().unwrap(); + reset_git_counters(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::fs::write(dir.path().join(".gitattributes"), "*.txt text eol=crlf\n").unwrap(); + // First call populates cache + let rel = Path::new("foo.txt"); + assert_eq!(git_check_attr_eol_cached(dir.path(), rel), Some(Eol::Crlf)); + // Second call hits cache + assert_eq!(git_check_attr_eol_cached(dir.path(), rel), Some(Eol::Crlf)); + let (_, _, attr1) = raw_counts(); + assert_eq!(attr1, 1); + + // Change gitattributes and notify + std::fs::write(dir.path().join(".gitattributes"), "*.txt text eol=lf\n").unwrap(); + notify_gitattributes_touched(dir.path()); + + // Next call re-runs git and reflects new mapping + assert_eq!(git_check_attr_eol_cached(dir.path(), rel), Some(Eol::Lf)); + let (_, _, attr2) = raw_counts(); + assert_eq!(attr2, 2); + } + + #[test] + fn test_windows_rel_key_normalization() { + let a = norm_rel_key(Path::new("A\\B.txt")); + let b = norm_rel_key(Path::new("a/b.txt")); + if cfg!(windows) { + assert_eq!(a, b); + } else { + assert_ne!(a, b); + } + } + + #[test] + fn test_gitattributes_eol_crlf_cross_platform() { + let _g = TEST_MUTEX.lock().unwrap(); + // Ensure that git_check_attr_eol correctly detects CRLF via .gitattributes + // on all platforms, regardless of absolute vs. relative path inputs. + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::fs::write(dir.path().join(".gitattributes"), "*.txt text eol=crlf\n").unwrap(); + let file = dir.path().join("foo.txt"); + std::fs::write(&file, "line1\n").unwrap(); + // Pass absolute path intentionally; helper should normalize. + let result = git_check_attr_eol(dir.path(), &file); + assert_eq!(result, Some(Eol::Crlf)); + } + + #[test] + fn test_gitattributes_nested_path_crlf() { + let _g = TEST_MUTEX.lock().unwrap(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + // Apply CRLF to everything under nested/ + std::fs::write( + dir.path().join(".gitattributes"), + "nested/** text eol=crlf\n", + ) + .unwrap(); + + let nested = dir.path().join("nested").join("sub").join("foo.txt"); + std::fs::create_dir_all(nested.parent().unwrap()).unwrap(); + std::fs::write(&nested, "line1\n").unwrap(); + let result = git_check_attr_eol(dir.path(), &nested); + assert_eq!(result, Some(Eol::Crlf)); + } + + #[test] + fn test_choose_eol_gitattributes_new_file_nonexistent() { + let _g = TEST_MUTEX.lock().unwrap(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::fs::write(dir.path().join(".gitattributes"), "*.txt text eol=crlf\n").unwrap(); + struct NoEnv; + impl Env for NoEnv { + fn get(&self, _k: &str) -> Option { + None + } + } + let e = choose_eol_for_new_file(dir.path(), Path::new("foo.txt"), &NoEnv); + assert_eq!(e, Eol::Crlf); + } + + #[test] + fn test_choose_eol_core_eol_crlf_no_attrs() { + let _g = TEST_MUTEX.lock().unwrap(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::process::Command::new("git") + .args([ + "-C", + dir.path().to_str().unwrap(), + "config", + "core.eol", + "crlf", + ]) + .status() + .unwrap(); + struct NoEnv; + impl Env for NoEnv { + fn get(&self, _k: &str) -> Option { + None + } + } + let e = choose_eol_for_new_file(dir.path(), Path::new("foo.txt"), &NoEnv); + assert_eq!(e, Eol::Crlf); + } + + #[test] + fn test_choose_eol_core_autocrlf_true_no_attrs() { + let _g = TEST_MUTEX.lock().unwrap(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::process::Command::new("git") + .args([ + "-C", + dir.path().to_str().unwrap(), + "config", + "core.autocrlf", + "true", + ]) + .status() + .unwrap(); + struct NoEnv; + impl Env for NoEnv { + fn get(&self, _k: &str) -> Option { + None + } + } + let e = choose_eol_for_new_file(dir.path(), Path::new("foo.txt"), &NoEnv); + assert_eq!(e, Eol::Crlf); + } + + #[test] + fn test_choose_eol_env_override_beats_config() { + let _g = TEST_MUTEX.lock().unwrap(); + let dir = tempdir().unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(dir.path()) + .status() + .unwrap(); + std::process::Command::new("git") + .args([ + "-C", + dir.path().to_str().unwrap(), + "config", + "core.eol", + "crlf", + ]) + .status() + .unwrap(); + struct EnvLF; + impl Env for EnvLF { + fn get(&self, k: &str) -> Option { + (k == "APPLY_PATCH_ASSUME_EOL").then(|| "lf".to_string()) + } + } + let e = choose_eol_for_new_file(dir.path(), Path::new("foo.txt"), &EnvLF); + assert_eq!(e, Eol::Lf); + } + + #[test] + fn test_parse_git_check_attr_eol_stdout() { + assert!(matches!( + super::parse_git_check_attr_eol_stdout("foo.txt: eol: crlf\n"), + Some(Eol::Crlf) + )); + assert!(matches!( + super::parse_git_check_attr_eol_stdout("foo.txt: eol: lf\n"), + Some(Eol::Lf) + )); + assert!(super::parse_git_check_attr_eol_stdout("foo.txt: eol: unspecified\n").is_none()); + assert!(super::parse_git_check_attr_eol_stdout("foo.txt: eol: auto\n").is_none()); + } +} + +// Note: detection-from-buffer fallback for new files is implemented at the +// call site so it can incorporate local context (e.g., repo presence). diff --git a/codex-rs/apply-patch/src/lib.rs b/codex-rs/apply-patch/src/lib.rs index ac2f409791..c8bdd9d7fb 100644 --- a/codex-rs/apply-patch/src/lib.rs +++ b/codex-rs/apply-patch/src/lib.rs @@ -1,3 +1,4 @@ +mod eol; mod parser; mod seek_sequence; mod standalone_executable; @@ -583,11 +584,51 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result { format!("Failed to create parent directories for {}", path.display()) })?; } - std::fs::write(path, contents) + // If a .gitattributes file is being modified, clear attr caches + if path.file_name().is_some_and(|n| n == ".gitattributes") + && let (Some(root), _) = repo_root_and_rel_for_path(path) + { + crate::eol::notify_gitattributes_touched(&root); + } + let (repo_root, rel) = repo_root_and_rel_for_path(path); + // New files: use deterministic policy: .gitattributes > core.eol > core.autocrlf > env > LF + let mut target = match repo_root.as_deref() { + Some(root) => { + let env = crate::eol::OsEnv; + let rel_ref = rel.as_deref().unwrap_or(path); + eol::choose_eol_for_new_file(root, rel_ref, &env) + } + None => eol::Eol::Lf, + }; + // Allow explicit Detect override via CLI/env only. + use crate::eol::AssumeEol; + use crate::eol::Eol; + use crate::eol::get_assume_eol; + if matches!(get_assume_eol(), AssumeEol::Detect) { + let det = eol::detect_eol_from_bytes(contents.as_bytes()); + if matches!(det, Eol::Lf | Eol::Crlf) { + target = det; + } + } + let final_contents = match target { + eol::Eol::Crlf => { + eol::normalize_to_eol_preserve_eof(contents.clone(), eol::Eol::Crlf) + } + eol::Eol::Lf => { + eol::normalize_to_eol_preserve_eof(contents.clone(), eol::Eol::Lf) + } + eol::Eol::Unknown => contents.clone(), + }; + std::fs::write(path, final_contents) .with_context(|| format!("Failed to write file {}", path.display()))?; added.push(path.clone()); } Hunk::DeleteFile { path } => { + if path.file_name().is_some_and(|n| n == ".gitattributes") + && let (Some(root), _) = repo_root_and_rel_for_path(path) + { + crate::eol::notify_gitattributes_touched(&root); + } std::fs::remove_file(path) .with_context(|| format!("Failed to delete file {}", path.display()))?; deleted.push(path.clone()); @@ -597,8 +638,57 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result { move_path, chunks, } => { - let AppliedPatch { new_contents, .. } = - derive_new_contents_from_chunks(path, chunks)?; + if path.file_name().is_some_and(|n| n == ".gitattributes") + && let (Some(root), _) = repo_root_and_rel_for_path(path) + { + crate::eol::notify_gitattributes_touched(&root); + } + let AppliedPatch { + original_contents, + new_contents, + } = derive_new_contents_from_chunks(path, chunks)?; + let (repo_root, rel) = repo_root_and_rel_for_path(path); + let final_contents = { + use crate::eol::AssumeEol; + use crate::eol::Eol; + match crate::eol::get_assume_eol() { + AssumeEol::Lf => { + crate::eol::normalize_to_eol_preserve_eof(new_contents.clone(), Eol::Lf) + } + AssumeEol::Crlf => crate::eol::normalize_to_eol_preserve_eof( + new_contents.clone(), + Eol::Crlf, + ), + AssumeEol::Git => { + let decided = + crate::eol::decide_eol(repo_root.as_deref(), rel.as_deref(), false); + match decided { + Eol::Lf => crate::eol::normalize_to_eol_preserve_eof( + new_contents.clone(), + Eol::Lf, + ), + Eol::Crlf => crate::eol::normalize_to_eol_preserve_eof( + new_contents.clone(), + Eol::Crlf, + ), + Eol::Unknown => new_contents.clone(), + } + } + AssumeEol::Detect | AssumeEol::Unspecified => { + match crate::eol::detect_eol_from_bytes(original_contents.as_bytes()) { + Eol::Lf => crate::eol::normalize_to_eol_preserve_eof( + new_contents.clone(), + Eol::Lf, + ), + Eol::Crlf => crate::eol::normalize_to_eol_preserve_eof( + new_contents.clone(), + Eol::Crlf, + ), + Eol::Unknown => new_contents.clone(), + } + } + } + }; if let Some(dest) = move_path { if let Some(parent) = dest.parent() && !parent.as_os_str().is_empty() @@ -607,13 +697,13 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result { format!("Failed to create parent directories for {}", dest.display()) })?; } - std::fs::write(dest, new_contents) + std::fs::write(dest, final_contents) .with_context(|| format!("Failed to write file {}", dest.display()))?; std::fs::remove_file(path) .with_context(|| format!("Failed to remove original {}", path.display()))?; modified.push(dest.clone()); } else { - std::fs::write(path, new_contents) + std::fs::write(path, final_contents) .with_context(|| format!("Failed to write file {}", path.display()))?; modified.push(path.clone()); } @@ -669,6 +759,55 @@ fn derive_new_contents_from_chunks( }) } +// Helper: compute repo root and repo-relative path for a given file path, if any +fn repo_root_and_rel_for_path(path: &Path) -> (Option, Option) { + let dir = path.parent().unwrap_or_else(|| Path::new(".")); + let out = std::process::Command::new("git") + .arg("-C") + .arg(dir) + .arg("rev-parse") + .arg("--show-toplevel") + .output(); + if let Ok(out) = out + && out.status.success() + { + let root = PathBuf::from(String::from_utf8_lossy(&out.stdout).trim().to_string()); + // Compute a repo-relative path. If `path` is relative, resolve it under the repo root. + let abs_path = if path.is_absolute() { + path.to_path_buf() + } else { + root.join(path) + }; + // Be resilient on Windows where case / separators may differ + if let Some(rel) = rel_path_case_insensitive(&root, &abs_path) { + return (Some(root), Some(rel)); + } + return (Some(root), None); + } + (None, None) +} + +#[cfg(windows)] +fn rel_path_case_insensitive(root: &Path, path: &Path) -> Option { + let r = root.to_string_lossy().replace('\\', "/"); + let p = path.to_string_lossy().replace('\\', "/"); + let r_lower = r.to_ascii_lowercase(); + let p_lower = p.to_ascii_lowercase(); + if let Some(_rest) = p_lower.strip_prefix(&(r_lower.clone() + "/")) { + let start = r.len() + 1; // include '/' + return Some(PathBuf::from(&p[start..])); + } + if p_lower == r_lower { + return Some(PathBuf::from(".")); + } + None +} + +#[cfg(not(windows))] +fn rel_path_case_insensitive(root: &Path, path: &Path) -> Option { + path.strip_prefix(root).ok().map(PathBuf::from) +} + /// Compute a list of replacements needed to transform `original_lines` into the /// new lines, given the patch `chunks`. Each replacement is returned as /// `(start_index, old_len, new_lines)`. @@ -840,6 +979,22 @@ pub fn print_summary( Ok(()) } +// Line ending handling moved to `eol` module. + +// Legacy helper `detect_eol` removed; use `eol::detect_eol_from_bytes` when needed. + +// `detect_eol_from_bytes` now lives in `eol`. + +// `normalize_to_eol_preserve_eof` now lives in `eol`. + +// AssumeEol parsing and state now live in `eol`. + +// git_check_attr helper removed; attribute parsing handled in `eol::git_check_attr_eol`. + +// repo_root_from_cwd removed; we compute repo root relative to each path. + +// legacy_decide_eol_do_not_use removed. + #[cfg(test)] mod tests { use super::*; @@ -1092,7 +1247,9 @@ PATCH"#, assert_eq!(stdout_str, expected_out); assert_eq!(stderr_str, ""); let contents = fs::read_to_string(path).unwrap(); - assert_eq!(contents, "ab\ncd\n"); + // New EOL policy may write OS-native EOLs for non-git dirs; normalize for assertion + let normalized = contents.replace("\r\n", "\n"); + assert_eq!(normalized, "ab\ncd\n"); } #[test] @@ -1338,6 +1495,56 @@ PATCH"#, assert_eq!(String::from_utf8(stderr).unwrap(), ""); } + #[test] + fn test_update_preserves_crlf_line_endings() { + let dir = tempdir().unwrap(); + let path = dir.path().join("eol_crlf.txt"); + // Original uses CRLF endings + std::fs::write(&path, "line1\r\nline2\r\n").unwrap(); + + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ + line1 +-line2 ++line2-replacement ++added"#, + path.display() + )); + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + apply_patch(&patch, &mut stdout, &mut stderr).unwrap(); + + let contents = std::fs::read_to_string(&path).unwrap(); + // All lines should use CRLF after update + assert_eq!(contents, "line1\r\nline2-replacement\r\nadded\r\n"); + } + + #[test] + fn test_update_preserves_lf_line_endings() { + let dir = tempdir().unwrap(); + let path = dir.path().join("eol_lf.txt"); + std::fs::write(&path, "line1\nline2\n").unwrap(); + + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ + line1 +-line2 ++line2-replacement ++added"#, + path.display() + )); + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + apply_patch(&patch, &mut stdout, &mut stderr).unwrap(); + + let contents = std::fs::read_to_string(&path).unwrap(); + assert_eq!(contents, "line1\nline2-replacement\nadded\n"); + } + #[test] fn test_unified_diff() { // Start with a file containing four lines. @@ -1451,6 +1658,248 @@ PATCH"#, assert_eq!(expected, diff); } + #[test] + fn test_detect_eol_from_bytes_variants() { + use crate::eol::Eol; + use crate::eol::detect_eol_from_bytes; + assert_eq!(detect_eol_from_bytes(b"no newlines"), Eol::Unknown); + assert_eq!(detect_eol_from_bytes(b"a\n"), Eol::Lf); + assert_eq!(detect_eol_from_bytes(b"a\r\n"), Eol::Crlf); + assert_eq!(detect_eol_from_bytes(b"a\r\n b\n c\r\n"), Eol::Crlf,); + assert_eq!(detect_eol_from_bytes(b"a\n b\r\n c\n"), Eol::Lf); + } + + #[test] + fn test_normalize_to_eol_preserve_eof() { + use crate::eol::Eol; + use crate::eol::normalize_to_eol_preserve_eof; + // Preserve EOF newline presence when converting + let s = String::from("a\nb\n"); + let out = normalize_to_eol_preserve_eof(s, Eol::Crlf); + assert_eq!(out, "a\r\nb\r\n"); + + let s2 = String::from("a\nb"); // no trailing newline + let out2 = normalize_to_eol_preserve_eof(s2, Eol::Crlf); + assert_eq!(out2, "a\r\nb"); + + // Round-trip CRLF -> LF retains EOF newline + let s3 = String::from("x\r\ny\r\n"); + let out3 = normalize_to_eol_preserve_eof(s3, Eol::Lf); + assert_eq!(out3, "x\ny\n"); + } + + #[test] + fn test_new_file_eol_from_gitattributes_crlf() { + let dir = tempdir().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + // init repo and write .gitattributes + std::process::Command::new("git") + .arg("init") + .arg("-q") + .status() + .unwrap(); + fs::write(dir.path().join(".gitattributes"), "*.txt text eol=crlf\n").unwrap(); + + let rel = Path::new("foo.txt"); + let path = dir.path().join(rel); + let patch = wrap_patch(&format!("*** Add File: {}\n+line1\n+line2", rel.display())); + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let fake_global = dir.path().join("_gitconfig_global"); + fs::write(&fake_global, "").unwrap(); + let mut cmd = PCommand::cargo_bin("apply_patch").unwrap(); + cmd.current_dir(dir.path()) + // Ensure no leaked override from other tests affects EOL selection. + .env_remove("APPLY_PATCH_ASSUME_EOL") + .env("GIT_CONFIG_GLOBAL", &fake_global) + .env("GIT_CONFIG_NOSYSTEM", "1") + .arg(&patch) + .assert() + .success(); + let contents = fs::read(&path).unwrap(); + assert!(contents.windows(2).any(|w| w == b"\r\n")); + } + + #[test] + fn test_new_file_eol_from_gitattributes_lf() { + let dir = tempdir().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + // init repo and write .gitattributes + std::process::Command::new("git") + .arg("init") + .arg("-q") + .status() + .unwrap(); + fs::write(dir.path().join(".gitattributes"), "*.txt text eol=lf\n").unwrap(); + + let rel = Path::new("bar.txt"); + let path = dir.path().join(rel); + let patch = wrap_patch(&format!("*** Add File: {}\n+line1\n+line2", rel.display())); + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let fake_global = dir.path().join("_gitconfig_global"); + fs::write(&fake_global, "").unwrap(); + let mut cmd = PCommand::cargo_bin("apply_patch").unwrap(); + cmd.current_dir(dir.path()) + // Ensure no leaked override from other tests affects EOL selection. + .env_remove("APPLY_PATCH_ASSUME_EOL") + .env("GIT_CONFIG_GLOBAL", &fake_global) + .env("GIT_CONFIG_NOSYSTEM", "1") + .arg(&patch) + .assert() + .success(); + let contents = fs::read(&path).unwrap(); + // Should be LF only + assert!(!contents.windows(2).any(|w| w == b"\r\n")); + assert!(contents.contains(&b'\n')); + } + + #[test] + fn test_new_file_eol_when_attr_missing_defaults_to_lf() { + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let dir = tempdir().unwrap(); + // No .gitattributes + let path = dir.path().join("noattr.txt"); + let patch = wrap_patch(&format!("*** Add File: {}\n+hello\n+world", path.display())); + PCommand::cargo_bin("apply_patch") + .unwrap() + .current_dir(dir.path()) + // Ensure no leaked override from other tests affects EOL selection. + .env_remove("APPLY_PATCH_ASSUME_EOL") + .arg(&patch) + .assert() + .success(); + let contents = std::fs::read(&path).unwrap(); + assert!(!contents.windows(2).any(|w| w == b"\r\n")); + assert!(contents.contains(&b'\n')); + } + + // core.autocrlf precedence is environment dependent (can be masked by + // user/system core.eol). We validate core.eol directly and .gitattributes above. + + #[test] + fn test_new_file_uses_core_eol_when_no_attrs() { + let dir = tempdir().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .status() + .unwrap(); + std::process::Command::new("git") + .args(["config", "core.eol", "crlf"]) + .status() + .unwrap(); + + let path = dir.path().join("baz.txt"); + let patch = wrap_patch(&format!("*** Add File: {}\n+line1\n+line2", path.display())); + // Use subprocess CLI to avoid concurrent git initialization quirks on Windows + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let fake_global = dir.path().join("_gitconfig_global"); + fs::write(&fake_global, "").unwrap(); + PCommand::cargo_bin("apply_patch") + .unwrap() + .current_dir(dir.path()) + .env("GIT_CONFIG_GLOBAL", &fake_global) + .env("GIT_CONFIG_NOSYSTEM", "1") + .arg(&patch) + .assert() + .success(); + let contents = fs::read(&path).unwrap(); + // With new deterministic policy, respect core.eol=crlf when no .gitattributes + assert!(contents.windows(2).any(|w| w == b"\r\n")); + } + + #[test] + fn test_new_file_eol_from_env_override_lf_no_repo() { + // Use subprocess CLI to avoid cross-test global state. + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let dir = tempdir().unwrap(); + let path = dir.path().join("env_lf.txt"); + let patch = wrap_patch(&format!("*** Add File: {}\n+hello\n+world", path.display())); + let mut cmd = PCommand::cargo_bin("apply_patch").unwrap(); + let assert = cmd.arg("--assume-eol=lf").arg(patch).assert(); + assert.success(); + let contents = fs::read(&path).unwrap(); + assert!(!contents.windows(2).any(|w| w == b"\r\n")); + } + + #[test] + fn test_new_file_eol_detect_from_patch_no_repo() { + // When not in a git repo and without overrides, prefer EOL used in the + // patch buffer as a last fallback. + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let dir = tempdir().unwrap(); + let path = dir.path().join("detect.txt"); + // Patch content uses LF line endings + let patch = wrap_patch(&format!("*** Add File: {}\n+hello\n+world", path.display())); + let mut cmd = PCommand::cargo_bin("apply_patch").unwrap(); + cmd.current_dir(dir.path()).arg(&patch).assert().success(); + let contents = std::fs::read(&path).unwrap(); + // Ensure file contains LF and does not contain CRLF even on Windows + assert!(contents.contains(&b'\n')); + assert!(!contents.windows(2).any(|w| w == b"\r\n")); + } + + #[test] + fn test_update_preserves_majority_crlf_on_mixed_file() { + let dir = tempdir().unwrap(); + let path = dir.path().join("mixed.txt"); + // Two CRLF, one LF + fs::write(&path, b"a\r\nb\r\nc\n").unwrap(); + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ + a +-b ++B +"#, + path.display() + )); + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + apply_patch(&patch, &mut stdout, &mut stderr).unwrap(); + let contents = fs::read(&path).unwrap(); + // All lines should now be CRLF + assert!(contents.windows(2).all(|w| w != b"\n\n")); + assert!(contents.windows(2).any(|w| w == b"\r\n")); + } + + #[test] + fn test_binary_attribute_skips_normalization() { + let dir = tempdir().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + std::process::Command::new("git") + .arg("init") + .arg("-q") + .status() + .unwrap(); + fs::write(dir.path().join(".gitattributes"), "*.bin binary\n").unwrap(); + + let path = dir.path().join("data.bin"); + let patch = wrap_patch(&format!("*** Add File: {}\n+aa\n+bb", path.display())); + // Use subprocess CLI to avoid cross-test global state and ensure git context + use assert_cmd::prelude::*; + use std::process::Command as PCommand; + let fake_global = dir.path().join("_gitconfig_global"); + fs::write(&fake_global, "").unwrap(); + PCommand::cargo_bin("apply_patch") + .unwrap() + .current_dir(dir.path()) + .env("GIT_CONFIG_GLOBAL", &fake_global) + .env("GIT_CONFIG_NOSYSTEM", "1") + .arg(&patch) + .assert() + .success(); + let contents = fs::read(&path).unwrap(); + // Keep as-is (LF) despite any git config + assert!(!contents.windows(2).any(|w| w == b"\r\n")); + } + #[test] fn test_unified_diff_insert_at_eof() { // Insert a new line at end‑of‑file. diff --git a/codex-rs/apply-patch/src/standalone_executable.rs b/codex-rs/apply-patch/src/standalone_executable.rs index ba31465c8d..1d8b630610 100644 --- a/codex-rs/apply-patch/src/standalone_executable.rs +++ b/codex-rs/apply-patch/src/standalone_executable.rs @@ -1,3 +1,4 @@ +use clap::Parser; use std::io::Read; use std::io::Write; @@ -9,25 +10,30 @@ pub fn main() -> ! { /// We would prefer to return `std::process::ExitCode`, but its `exit_process()` /// method is still a nightly API and we want main() to return !. pub fn run_main() -> i32 { - // Expect either one argument (the full apply_patch payload) or read it from stdin. - let mut args = std::env::args_os(); - let _argv0 = args.next(); + let cli = Cli::parse(); - let patch_arg = match args.next() { - Some(arg) => match arg.into_string() { - Ok(s) => s, - Err(_) => { - eprintln!("Error: apply_patch requires a UTF-8 PATCH argument."); - return 1; + // CLI overrides env; if not provided, respect env via default inside eol module + if let Some(val) = cli.assume_eol.as_deref() { + match crate::eol::parse_assume_eol(val) { + Some(sel) => crate::eol::set_assume_eol(sel), + None => { + eprintln!("Error: invalid --assume-eol value: {val}"); + return 2; } - }, + } + } + + let patch_arg = match cli.patch { + Some(s) => s, None => { - // No argument provided; attempt to read the patch from stdin. + // No positional provided; attempt to read the patch from stdin. let mut buf = String::new(); match std::io::stdin().read_to_string(&mut buf) { Ok(_) => { if buf.is_empty() { - eprintln!("Usage: apply_patch 'PATCH'\n echo 'PATCH' | apply-patch"); + eprintln!( + "Usage: apply_patch [-E|--assume-eol=lf|crlf|git|detect] 'PATCH'\n echo 'PATCH' | apply_patch" + ); return 2; } buf @@ -40,12 +46,6 @@ pub fn run_main() -> i32 { } }; - // Refuse extra args to avoid ambiguity. - if args.next().is_some() { - eprintln!("Error: apply_patch accepts exactly one argument."); - return 2; - } - let mut stdout = std::io::stdout(); let mut stderr = std::io::stderr(); match crate::apply_patch(&patch_arg, &mut stdout, &mut stderr) { @@ -57,3 +57,19 @@ pub fn run_main() -> i32 { Err(_) => 1, } } + +#[derive(Parser, Debug)] +#[command( + author, + version, + about = "Apply a simple patch format to the filesystem", + disable_help_subcommand = true +)] +struct Cli { + /// Assume EOL policy for writes: lf|crlf|git|detect (CLI overrides env) + #[arg(short = 'E', long = "assume-eol", value_name = "MODE")] + assume_eol: Option, + + /// The raw patch body; if omitted, reads from stdin + patch: Option, +} diff --git a/codex-rs/apply-patch/tests/suite/cli.rs b/codex-rs/apply-patch/tests/suite/cli.rs index ed95aba17c..46536be760 100644 --- a/codex-rs/apply-patch/tests/suite/cli.rs +++ b/codex-rs/apply-patch/tests/suite/cli.rs @@ -23,7 +23,10 @@ fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> { .assert() .success() .stdout(format!("Success. Updated the following files:\nA {file}\n")); - assert_eq!(fs::read_to_string(&absolute_path)?, "hello\n"); + { + let s = fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "hello\n"); + } // 2) Update the file let update_patch = format!( @@ -41,7 +44,10 @@ fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> { .assert() .success() .stdout(format!("Success. Updated the following files:\nM {file}\n")); - assert_eq!(fs::read_to_string(&absolute_path)?, "world\n"); + { + let s = fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "world\n"); + } Ok(()) } @@ -66,7 +72,10 @@ fn test_apply_patch_cli_stdin_add_and_update() -> anyhow::Result<()> { .assert() .success() .stdout(format!("Success. Updated the following files:\nA {file}\n")); - assert_eq!(fs::read_to_string(&absolute_path)?, "hello\n"); + { + let s = fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "hello\n"); + } // 2) Update the file via stdin let update_patch = format!( @@ -84,7 +93,68 @@ fn test_apply_patch_cli_stdin_add_and_update() -> anyhow::Result<()> { .assert() .success() .stdout(format!("Success. Updated the following files:\nM {file}\n")); - assert_eq!(fs::read_to_string(&absolute_path)?, "world\n"); + { + let s = fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "world\n"); + } Ok(()) } + +#[test] +fn test_detect_overrides_repo_policy() -> anyhow::Result<()> { + let tmp = tempdir()?; + // Initialize a repo with CRLF policy via .gitattributes + std::process::Command::new("git") + .arg("init") + .arg("-q") + .current_dir(tmp.path()) + .status()?; + std::fs::write(tmp.path().join(".gitattributes"), "*.txt text eol=crlf\n")?; + + let file = "detect_overrides.txt"; + let absolute_path = tmp.path().join(file); + // LF patch content + let add_patch = format!( + r#"*** Begin Patch +*** Add File: {file} ++hello +*** End Patch"# + ); + // CLI Detect should override repo policy and keep LF + assert_cmd::Command::cargo_bin("apply_patch")? + .current_dir(tmp.path()) + .arg("--assume-eol=detect") + .arg(add_patch) + .assert() + .success(); + let s = std::fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "hello\n"); + Ok(()) +} + +#[test] +fn test_cli_overrides_env_assume_eol() -> anyhow::Result<()> { + let tmp = tempdir()?; + let file = "env_cli_precedence.txt"; + let absolute_path = tmp.path().join(file); + + // Env says CRLF, CLI says LF. CLI should win. + let add_patch = format!( + r#"*** Begin Patch +*** Add File: {file} ++hello +*** End Patch"# + ); + Command::cargo_bin("apply_patch") + .expect("should find apply_patch binary") + .current_dir(tmp.path()) + .env("APPLY_PATCH_ASSUME_EOL", "crlf") + .arg("--assume-eol=lf") + .arg(add_patch) + .assert() + .success(); + let s = fs::read_to_string(&absolute_path)?; + assert_eq!(s.replace("\r\n", "\n"), "hello\n"); + Ok(()) +} diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 3f75ed1819..a0d3dcfe12 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -1,362 +1,368 @@ -#![expect(clippy::expect_used)] - -use tempfile::TempDir; - -use codex_core::CodexConversation; -use codex_core::config::Config; -use codex_core::config::ConfigOverrides; -use codex_core::config::ConfigToml; -use regex_lite::Regex; - -#[cfg(target_os = "linux")] -use assert_cmd::cargo::cargo_bin; - -pub mod responses; -pub mod test_codex; -pub mod test_codex_exec; - -#[track_caller] -pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> { - let regex = Regex::new(pattern).unwrap_or_else(|err| { - panic!("failed to compile regex {pattern:?}: {err}"); - }); - regex - .captures(actual) - .unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}")) -} - -/// Returns a default `Config` whose on-disk state is confined to the provided -/// temporary directory. Using a per-test directory keeps tests hermetic and -/// avoids clobbering a developer’s real `~/.codex`. -pub fn load_default_config_for_test(codex_home: &TempDir) -> Config { - Config::load_from_base_config_with_overrides( - ConfigToml::default(), - default_test_overrides(), - codex_home.path().to_path_buf(), - ) - .expect("defaults for test should always succeed") -} - -#[cfg(target_os = "linux")] -fn default_test_overrides() -> ConfigOverrides { - ConfigOverrides { - codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")), - ..ConfigOverrides::default() - } -} - -#[cfg(not(target_os = "linux"))] -fn default_test_overrides() -> ConfigOverrides { - ConfigOverrides::default() -} - -/// Builds an SSE stream body from a JSON fixture. -/// -/// The fixture must contain an array of objects where each object represents a -/// single SSE event with at least a `type` field matching the `event:` value. -/// Additional fields become the JSON payload for the `data:` line. An object -/// with only a `type` field results in an event with no `data:` section. This -/// makes it trivial to extend the fixtures as OpenAI adds new event kinds or -/// fields. -pub fn load_sse_fixture(path: impl AsRef) -> String { - let events: Vec = - serde_json::from_reader(std::fs::File::open(path).expect("read fixture")) - .expect("parse JSON fixture"); - events - .into_iter() - .map(|e| { - let kind = e - .get("type") - .and_then(|v| v.as_str()) - .expect("fixture event missing type"); - if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { - format!("event: {kind}\n\n") - } else { - format!("event: {kind}\ndata: {e}\n\n") - } - }) - .collect() -} - -pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String { - let replaced = raw.replace("__ID__", id); - let events: Vec = - serde_json::from_str(&replaced).expect("parse JSON fixture"); - events - .into_iter() - .map(|e| { - let kind = e - .get("type") - .and_then(|v| v.as_str()) - .expect("fixture event missing type"); - if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { - format!("event: {kind}\n\n") - } else { - format!("event: {kind}\ndata: {e}\n\n") - } - }) - .collect() -} - -/// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the -/// fixture template with the supplied identifier before parsing. This lets a -/// single JSON template be reused by multiple tests that each need a unique -/// `response_id`. -pub fn load_sse_fixture_with_id(path: impl AsRef, id: &str) -> String { - let raw = std::fs::read_to_string(path).expect("read fixture template"); - let replaced = raw.replace("__ID__", id); - let events: Vec = - serde_json::from_str(&replaced).expect("parse JSON fixture"); - events - .into_iter() - .map(|e| { - let kind = e - .get("type") - .and_then(|v| v.as_str()) - .expect("fixture event missing type"); - if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { - format!("event: {kind}\n\n") - } else { - format!("event: {kind}\ndata: {e}\n\n") - } - }) - .collect() -} - -pub async fn wait_for_event( - codex: &CodexConversation, - predicate: F, -) -> codex_core::protocol::EventMsg -where - F: FnMut(&codex_core::protocol::EventMsg) -> bool, -{ - use tokio::time::Duration; - wait_for_event_with_timeout(codex, predicate, Duration::from_secs(1)).await -} - -pub async fn wait_for_event_match(codex: &CodexConversation, matcher: F) -> T -where - F: Fn(&codex_core::protocol::EventMsg) -> Option, -{ - let ev = wait_for_event(codex, |ev| matcher(ev).is_some()).await; - matcher(&ev).unwrap() -} - +#![expect(clippy::expect_used)] + +use tempfile::TempDir; + +use codex_core::CodexConversation; +use codex_core::config::Config; +use codex_core::config::ConfigOverrides; +use codex_core::config::ConfigToml; +use regex_lite::Regex; + +#[cfg(target_os = "linux")] +use assert_cmd::cargo::cargo_bin; + +pub mod responses; +pub mod test_codex; +pub mod test_codex_exec; + +#[track_caller] +pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> { + let regex = Regex::new(pattern).unwrap_or_else(|err| { + panic!("failed to compile regex {pattern:?}: {err}"); + }); + regex + .captures(actual) + .unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}")) +} + +/// Returns a default `Config` whose on-disk state is confined to the provided +/// temporary directory. Using a per-test directory keeps tests hermetic and +/// avoids clobbering a developer’s real `~/.codex`. +pub fn load_default_config_for_test(codex_home: &TempDir) -> Config { + Config::load_from_base_config_with_overrides( + ConfigToml::default(), + default_test_overrides(), + codex_home.path().to_path_buf(), + ) + .expect("defaults for test should always succeed") +} + +#[cfg(target_os = "linux")] +fn default_test_overrides() -> ConfigOverrides { + ConfigOverrides { + codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")), + ..ConfigOverrides::default() + } +} + +#[cfg(not(target_os = "linux"))] +fn default_test_overrides() -> ConfigOverrides { + ConfigOverrides::default() +} + +/// Builds an SSE stream body from a JSON fixture. +/// +/// The fixture must contain an array of objects where each object represents a +/// single SSE event with at least a `type` field matching the `event:` value. +/// Additional fields become the JSON payload for the `data:` line. An object +/// with only a `type` field results in an event with no `data:` section. This +/// makes it trivial to extend the fixtures as OpenAI adds new event kinds or +/// fields. +pub fn load_sse_fixture(path: impl AsRef) -> String { + let events: Vec = + serde_json::from_reader(std::fs::File::open(path).expect("read fixture")) + .expect("parse JSON fixture"); + events + .into_iter() + .map(|e| { + let kind = e + .get("type") + .and_then(|v| v.as_str()) + .expect("fixture event missing type"); + if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { + format!("event: {kind}\n\n") + } else { + format!("event: {kind}\ndata: {e}\n\n") + } + }) + .collect() +} + +pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String { + let replaced = raw.replace("__ID__", id); + let events: Vec = + serde_json::from_str(&replaced).expect("parse JSON fixture"); + events + .into_iter() + .map(|e| { + let kind = e + .get("type") + .and_then(|v| v.as_str()) + .expect("fixture event missing type"); + if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { + format!("event: {kind}\n\n") + } else { + format!("event: {kind}\ndata: {e}\n\n") + } + }) + .collect() +} + +/// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the +/// fixture template with the supplied identifier before parsing. This lets a +/// single JSON template be reused by multiple tests that each need a unique +/// `response_id`. +pub fn load_sse_fixture_with_id(path: impl AsRef, id: &str) -> String { + let raw = std::fs::read_to_string(path).expect("read fixture template"); + let replaced = raw.replace("__ID__", id); + let events: Vec = + serde_json::from_str(&replaced).expect("parse JSON fixture"); + events + .into_iter() + .map(|e| { + let kind = e + .get("type") + .and_then(|v| v.as_str()) + .expect("fixture event missing type"); + if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { + format!("event: {kind}\n\n") + } else { + format!("event: {kind}\ndata: {e}\n\n") + } + }) + .collect() +} + +pub async fn wait_for_event( + codex: &CodexConversation, + predicate: F, +) -> codex_core::protocol::EventMsg +where + F: FnMut(&codex_core::protocol::EventMsg) -> bool, +{ + use tokio::time::Duration; + wait_for_event_with_timeout(codex, predicate, Duration::from_secs(1)).await +} + +pub async fn wait_for_event_match(codex: &CodexConversation, matcher: F) -> T +where + F: Fn(&codex_core::protocol::EventMsg) -> Option, +{ + let ev = wait_for_event(codex, |ev| matcher(ev).is_some()).await; + matcher(&ev).unwrap() +} + pub async fn wait_for_event_with_timeout( - codex: &CodexConversation, - mut predicate: F, - wait_time: tokio::time::Duration, -) -> codex_core::protocol::EventMsg -where - F: FnMut(&codex_core::protocol::EventMsg) -> bool, -{ + codex: &CodexConversation, + mut predicate: F, + wait_time: tokio::time::Duration, +) -> codex_core::protocol::EventMsg +where + F: FnMut(&codex_core::protocol::EventMsg) -> bool, +{ use tokio::time::Duration; use tokio::time::timeout; - loop { - // Allow a bit more time to accommodate async startup work (e.g. config IO, tool discovery) - let ev = timeout(wait_time.max(Duration::from_secs(5)), codex.next_event()) + // Windows ARM runners are slower to schedule async tasks; give them + // a larger floor to reduce spurious timeouts in CI. + #[cfg(all(target_os = "windows", target_arch = "aarch64"))] + const MIN_WAIT: Duration = Duration::from_secs(15); + #[cfg(not(all(target_os = "windows", target_arch = "aarch64")))] + const MIN_WAIT: Duration = Duration::from_secs(5); + loop { + // Allow a bit more time to accommodate async startup work (e.g. config IO, tool discovery) + let ev = timeout(wait_time.max(MIN_WAIT), codex.next_event()) .await .expect("timeout waiting for event") .expect("stream ended unexpectedly"); - if predicate(&ev.msg) { - return ev.msg; - } - } -} - -pub fn sandbox_env_var() -> &'static str { - codex_core::spawn::CODEX_SANDBOX_ENV_VAR -} - -pub fn sandbox_network_env_var() -> &'static str { - codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR -} - -pub mod fs_wait { - use anyhow::Result; - use anyhow::anyhow; - use notify::RecursiveMode; - use notify::Watcher; - use std::path::Path; - use std::path::PathBuf; - use std::sync::mpsc; - use std::sync::mpsc::RecvTimeoutError; - use std::time::Duration; - use std::time::Instant; - use tokio::task; - use walkdir::WalkDir; - - pub async fn wait_for_path_exists( - path: impl Into, - timeout: Duration, - ) -> Result { - let path = path.into(); - task::spawn_blocking(move || wait_for_path_exists_blocking(path, timeout)).await? - } - - pub async fn wait_for_matching_file( - root: impl Into, - timeout: Duration, - predicate: impl FnMut(&Path) -> bool + Send + 'static, - ) -> Result { - let root = root.into(); - task::spawn_blocking(move || { - let mut predicate = predicate; - blocking_find_matching_file(root, timeout, &mut predicate) - }) - .await? - } - - fn wait_for_path_exists_blocking(path: PathBuf, timeout: Duration) -> Result { - if path.exists() { - return Ok(path); - } - - let watch_root = nearest_existing_ancestor(&path); - let (tx, rx) = mpsc::channel(); - let mut watcher = notify::recommended_watcher(move |res| { - let _ = tx.send(res); - })?; - watcher.watch(&watch_root, RecursiveMode::Recursive)?; - - let deadline = Instant::now() + timeout; - loop { - if path.exists() { - return Ok(path.clone()); - } - let now = Instant::now(); - if now >= deadline { - break; - } - let remaining = deadline.saturating_duration_since(now); - match rx.recv_timeout(remaining) { - Ok(Ok(_event)) => { - if path.exists() { - return Ok(path.clone()); - } - } - Ok(Err(err)) => return Err(err.into()), - Err(RecvTimeoutError::Timeout) => break, - Err(RecvTimeoutError::Disconnected) => break, - } - } - - if path.exists() { - Ok(path) - } else { - Err(anyhow!("timed out waiting for {path:?}")) - } - } - - fn blocking_find_matching_file( - root: PathBuf, - timeout: Duration, - predicate: &mut impl FnMut(&Path) -> bool, - ) -> Result { - let root = wait_for_path_exists_blocking(root, timeout)?; - - if let Some(found) = scan_for_match(&root, predicate) { - return Ok(found); - } - - let (tx, rx) = mpsc::channel(); - let mut watcher = notify::recommended_watcher(move |res| { - let _ = tx.send(res); - })?; - watcher.watch(&root, RecursiveMode::Recursive)?; - - let deadline = Instant::now() + timeout; - - while Instant::now() < deadline { - let remaining = deadline.saturating_duration_since(Instant::now()); - match rx.recv_timeout(remaining) { - Ok(Ok(_event)) => { - if let Some(found) = scan_for_match(&root, predicate) { - return Ok(found); - } - } - Ok(Err(err)) => return Err(err.into()), - Err(RecvTimeoutError::Timeout) => break, - Err(RecvTimeoutError::Disconnected) => break, - } - } - - if let Some(found) = scan_for_match(&root, predicate) { - Ok(found) - } else { - Err(anyhow!("timed out waiting for matching file in {root:?}")) - } - } - - fn scan_for_match(root: &Path, predicate: &mut impl FnMut(&Path) -> bool) -> Option { - for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) { - let path = entry.path(); - if !entry.file_type().is_file() { - continue; - } - if predicate(path) { - return Some(path.to_path_buf()); - } - } - None - } - - fn nearest_existing_ancestor(path: &Path) -> PathBuf { - let mut current = path; - loop { - if current.exists() { - return current.to_path_buf(); - } - match current.parent() { - Some(parent) => current = parent, - None => return PathBuf::from("."), - } - } - } -} - -#[macro_export] -macro_rules! skip_if_sandbox { - () => {{ - if ::std::env::var($crate::sandbox_env_var()) - == ::core::result::Result::Ok("seatbelt".to_string()) - { - eprintln!( - "{} is set to 'seatbelt', skipping test.", - $crate::sandbox_env_var() - ); - return; - } - }}; - ($return_value:expr $(,)?) => {{ - if ::std::env::var($crate::sandbox_env_var()) - == ::core::result::Result::Ok("seatbelt".to_string()) - { - eprintln!( - "{} is set to 'seatbelt', skipping test.", - $crate::sandbox_env_var() - ); - return $return_value; - } - }}; -} - -#[macro_export] -macro_rules! skip_if_no_network { - () => {{ - if ::std::env::var($crate::sandbox_network_env_var()).is_ok() { - println!( - "Skipping test because it cannot execute when network is disabled in a Codex sandbox." - ); - return; - } - }}; - ($return_value:expr $(,)?) => {{ - if ::std::env::var($crate::sandbox_network_env_var()).is_ok() { - println!( - "Skipping test because it cannot execute when network is disabled in a Codex sandbox." - ); - return $return_value; - } - }}; -} + if predicate(&ev.msg) { + return ev.msg; + } + } +} + +pub fn sandbox_env_var() -> &'static str { + codex_core::spawn::CODEX_SANDBOX_ENV_VAR +} + +pub fn sandbox_network_env_var() -> &'static str { + codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR +} + +pub mod fs_wait { + use anyhow::Result; + use anyhow::anyhow; + use notify::RecursiveMode; + use notify::Watcher; + use std::path::Path; + use std::path::PathBuf; + use std::sync::mpsc; + use std::sync::mpsc::RecvTimeoutError; + use std::time::Duration; + use std::time::Instant; + use tokio::task; + use walkdir::WalkDir; + + pub async fn wait_for_path_exists( + path: impl Into, + timeout: Duration, + ) -> Result { + let path = path.into(); + task::spawn_blocking(move || wait_for_path_exists_blocking(path, timeout)).await? + } + + pub async fn wait_for_matching_file( + root: impl Into, + timeout: Duration, + predicate: impl FnMut(&Path) -> bool + Send + 'static, + ) -> Result { + let root = root.into(); + task::spawn_blocking(move || { + let mut predicate = predicate; + blocking_find_matching_file(root, timeout, &mut predicate) + }) + .await? + } + + fn wait_for_path_exists_blocking(path: PathBuf, timeout: Duration) -> Result { + if path.exists() { + return Ok(path); + } + + let watch_root = nearest_existing_ancestor(&path); + let (tx, rx) = mpsc::channel(); + let mut watcher = notify::recommended_watcher(move |res| { + let _ = tx.send(res); + })?; + watcher.watch(&watch_root, RecursiveMode::Recursive)?; + + let deadline = Instant::now() + timeout; + loop { + if path.exists() { + return Ok(path.clone()); + } + let now = Instant::now(); + if now >= deadline { + break; + } + let remaining = deadline.saturating_duration_since(now); + match rx.recv_timeout(remaining) { + Ok(Ok(_event)) => { + if path.exists() { + return Ok(path.clone()); + } + } + Ok(Err(err)) => return Err(err.into()), + Err(RecvTimeoutError::Timeout) => break, + Err(RecvTimeoutError::Disconnected) => break, + } + } + + if path.exists() { + Ok(path) + } else { + Err(anyhow!("timed out waiting for {path:?}")) + } + } + + fn blocking_find_matching_file( + root: PathBuf, + timeout: Duration, + predicate: &mut impl FnMut(&Path) -> bool, + ) -> Result { + let root = wait_for_path_exists_blocking(root, timeout)?; + + if let Some(found) = scan_for_match(&root, predicate) { + return Ok(found); + } + + let (tx, rx) = mpsc::channel(); + let mut watcher = notify::recommended_watcher(move |res| { + let _ = tx.send(res); + })?; + watcher.watch(&root, RecursiveMode::Recursive)?; + + let deadline = Instant::now() + timeout; + + while Instant::now() < deadline { + let remaining = deadline.saturating_duration_since(Instant::now()); + match rx.recv_timeout(remaining) { + Ok(Ok(_event)) => { + if let Some(found) = scan_for_match(&root, predicate) { + return Ok(found); + } + } + Ok(Err(err)) => return Err(err.into()), + Err(RecvTimeoutError::Timeout) => break, + Err(RecvTimeoutError::Disconnected) => break, + } + } + + if let Some(found) = scan_for_match(&root, predicate) { + Ok(found) + } else { + Err(anyhow!("timed out waiting for matching file in {root:?}")) + } + } + + fn scan_for_match(root: &Path, predicate: &mut impl FnMut(&Path) -> bool) -> Option { + for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) { + let path = entry.path(); + if !entry.file_type().is_file() { + continue; + } + if predicate(path) { + return Some(path.to_path_buf()); + } + } + None + } + + fn nearest_existing_ancestor(path: &Path) -> PathBuf { + let mut current = path; + loop { + if current.exists() { + return current.to_path_buf(); + } + match current.parent() { + Some(parent) => current = parent, + None => return PathBuf::from("."), + } + } + } +} + +#[macro_export] +macro_rules! skip_if_sandbox { + () => {{ + if ::std::env::var($crate::sandbox_env_var()) + == ::core::result::Result::Ok("seatbelt".to_string()) + { + eprintln!( + "{} is set to 'seatbelt', skipping test.", + $crate::sandbox_env_var() + ); + return; + } + }}; + ($return_value:expr $(,)?) => {{ + if ::std::env::var($crate::sandbox_env_var()) + == ::core::result::Result::Ok("seatbelt".to_string()) + { + eprintln!( + "{} is set to 'seatbelt', skipping test.", + $crate::sandbox_env_var() + ); + return $return_value; + } + }}; +} + +#[macro_export] +macro_rules! skip_if_no_network { + () => {{ + if ::std::env::var($crate::sandbox_network_env_var()).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + }}; + ($return_value:expr $(,)?) => {{ + if ::std::env::var($crate::sandbox_network_env_var()).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return $return_value; + } + }}; +}