From beadbd2a0c2a7fe69761a0e11bab2809aedbce0f Mon Sep 17 00:00:00 2001 From: DCjanus Date: Wed, 5 Nov 2025 01:10:18 +0800 Subject: [PATCH 1/4] feat: support copying from containers --- docs/features/files.md | 87 ++++++++++++ mkdocs.yml | 1 + testcontainers/src/core.rs | 5 +- testcontainers/src/core/client.rs | 97 +++++++++++-- .../src/core/containers/async_container.rs | 23 +++- .../core/containers/async_container/raw.rs | 16 +++ .../src/core/containers/sync_container.rs | 23 +++- testcontainers/src/core/copy.rs | 127 +++++++++++++++++- testcontainers/src/lib.rs | 5 +- testcontainers/tests/async_runner.rs | 40 ++++++ testcontainers/tests/sync_runner.rs | 34 +++++ 11 files changed, 435 insertions(+), 23 deletions(-) create mode 100644 docs/features/files.md diff --git a/docs/features/files.md b/docs/features/files.md new file mode 100644 index 00000000..ef6d7b36 --- /dev/null +++ b/docs/features/files.md @@ -0,0 +1,87 @@ +# Files and Mounts + +Rust Testcontainers lets you seed container filesystems before startup, collect artifacts produced inside containers, and bind host paths at runtime. The APIs deliver smooth ergonomics while staying idiomatic to Rust. + +## Copying Files Into Containers (Before Startup) + +Use `ImageExt::with_copy_to` to stage files or directories before the container starts. Content can come from raw bytes or host paths: + +```rust +// Example: copying inline bytes and directories into a container +use testcontainers::{GenericImage, WaitFor}; + +let project_assets = std::path::Path::new("tests/fixtures/assets"); +let image = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_copy_to("/opt/app/config.yaml", br#"mode = "test""#.to_vec()) + .with_copy_to("/opt/app/assets", project_assets); +``` + +Everything is packed into a TAR archive, preserving nested directories. The helper accepts either `Vec` or any path-like value implementing `CopyDataSource`. +Note: file permissions and symbolic links follow Docker’s default TAR handling. + +## Copying Files From Containers (After Execution) + +Use `copy_file_from` to pull data produced inside the container: + +```rust +// Example: copying a file from a running container to the host +use tempfile::tempdir; +use testcontainers::{GenericImage, WaitFor}; + +#[tokio::test] +async fn copy_example() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_cmd(["sh", "-c", "echo '42' > /tmp/result.txt && sleep 10"]) + .with_wait_for(WaitFor::seconds(1)) + .start() + .await?; + + let destination = tempdir()?.path().join("result.txt"); + container + .copy_file_from("/tmp/result.txt", destination.as_path()) + .await?; + assert_eq!(tokio::fs::read_to_string(&destination).await?, "42\n"); + Ok(()) +} +``` + +- `copy_file_from` streams the sole regular-file entry produced by Docker into any destination implementing `CopyFileFromContainer` (for example `&Path`, `PathBuf`, `Vec`, or `&mut Vec`). + It verifies that **exactly one** file exists and returns an error (e.g., `CopyFileError::UnexpectedDirectory`) when the path resolves to a directory or an unsupported TAR record. +- To capture the contents in memory: + ```rust + let mut bytes = Vec::new(); + container.copy_file_from("/tmp/result.txt", &mut bytes).await?; + ``` + +The blocking `Container` type provides the same `copy_file_from` API. + +## Using Mounts for Writable Workspaces + +When a bind or tmpfs mount fits better than copy semantics, use the `Mount` helpers: + +```rust +// Example: mounting a host directory for read/write access +use std::path::Path; +use testcontainers::core::{mounts::Mount, AccessMode, MountType}; + +let host_data = Path::new("/var/tmp/integration-data"); +let mount = Mount::bind(host_data, "/workspace") + .with_mode(AccessMode::ReadWrite) + .with_type(MountType::Bind); + +let image = GenericImage::new("python", "3.13") + .with_mount(mount) + .with_cmd(["python", "/workspace/run.py"]); +``` + +Bind mounts share host state directly. Tmpfs mounts create ephemeral in-memory storage useful for scratch data or caches. + +## Selecting an Approach + +- **Copy before startup** — for deterministic inputs. +- **Copy from containers** — to capture build artifacts, logs, or test fixtures produced during a run. +- **Use mounts** — when containers need to read/write large amounts of data efficiently without re-tarring. + +Mixing these tools keeps tests hermetic (isolated and reproducible) while letting you inspect outputs locally. +Document each choice in code so teammates know whether data is ephemeral (`tmpfs`), seeded once (`with_copy_to`), or captured for later assertions (`copy_file_from`). \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 70976534..5fcabc41 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -44,6 +44,7 @@ nav: - features/configuration.md - features/wait_strategies.md - features/exec_commands.md + - features/files.md - features/networking.md - features/building_images.md - features/docker_compose.md diff --git a/testcontainers/src/core.rs b/testcontainers/src/core.rs index 78f6441a..c60224de 100644 --- a/testcontainers/src/core.rs +++ b/testcontainers/src/core.rs @@ -6,7 +6,10 @@ pub use self::{ buildable::BuildableImage, }, containers::*, - copy::{CopyDataSource, CopyToContainer, CopyToContainerCollection, CopyToContainerError}, + copy::{ + CopyDataSource, CopyFileFromContainer, CopyFromContainerError, CopyToContainer, + CopyToContainerCollection, CopyToContainerError, + }, healthcheck::Healthcheck, image::{ContainerState, ExecCommand, Image, ImageExt}, mounts::{AccessMode, Mount, MountTmpfsOptions, MountType}, diff --git a/testcontainers/src/core/client.rs b/testcontainers/src/core/client.rs index bc89e86c..7f4d2fe3 100644 --- a/testcontainers/src/core/client.rs +++ b/testcontainers/src/core/client.rs @@ -1,9 +1,4 @@ -use std::{ - collections::HashMap, - io::{self}, - str::FromStr, - sync::Arc, -}; +use std::{collections::HashMap, io, str::FromStr, sync::Arc}; use bollard::{ auth::DockerCredentials, @@ -17,21 +12,30 @@ use bollard::{ }, query_parameters::{ BuildImageOptionsBuilder, BuilderVersion, CreateContainerOptions, - CreateImageOptionsBuilder, InspectContainerOptions, InspectContainerOptionsBuilder, - InspectNetworkOptions, InspectNetworkOptionsBuilder, ListContainersOptionsBuilder, - ListNetworksOptions, LogsOptionsBuilder, RemoveContainerOptionsBuilder, - StartContainerOptions, StopContainerOptionsBuilder, UploadToContainerOptionsBuilder, + CreateImageOptionsBuilder, DownloadFromContainerOptionsBuilder, InspectContainerOptions, + InspectContainerOptionsBuilder, InspectNetworkOptions, InspectNetworkOptionsBuilder, + ListContainersOptionsBuilder, ListNetworksOptions, LogsOptionsBuilder, + RemoveContainerOptionsBuilder, StartContainerOptions, StopContainerOptionsBuilder, + UploadToContainerOptionsBuilder, }, Docker, }; use ferroid::{base32::Base32UlidExt, id::ULID}; -use futures::{StreamExt, TryStreamExt}; -use tokio::sync::{Mutex, OnceCell}; +use futures::{pin_mut, StreamExt, TryStreamExt}; +use tokio::{ + io::AsyncRead, + sync::{Mutex, OnceCell}, +}; +use tokio_tar::{Archive as AsyncTarArchive, EntryType}; +use tokio_util::io::StreamReader; use url::Url; use crate::core::{ client::exec::ExecResult, - copy::{CopyToContainer, CopyToContainerCollection, CopyToContainerError}, + copy::{ + CopyFileFromContainer, CopyFromContainerError, CopyToContainer, CopyToContainerCollection, + CopyToContainerError, + }, env::{self, ConfigurationError}, logs::{ stream::{LogStream, RawLogStream}, @@ -127,6 +131,8 @@ pub enum ClientError { UploadToContainerError(BollardError), #[error("failed to prepare data for copy-to-container: {0}")] CopyToContainerError(CopyToContainerError), + #[error("failed to handle data copied from container: {0}")] + CopyFromContainerError(CopyFromContainerError), } /// The internal client. @@ -404,6 +410,71 @@ impl Client { .map_err(ClientError::UploadToContainerError) } + pub(crate) async fn copy_file_from_container( + &self, + container_id: impl AsRef, + container_path: impl AsRef, + target: T, + ) -> Result + where + T: CopyFileFromContainer, + { + let container_id = container_id.as_ref(); + let options = DownloadFromContainerOptionsBuilder::new() + .path(container_path.as_ref()) + .build(); + + let stream = self + .bollard + .download_from_container(container_id, Some(options)) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err)); + let reader = StreamReader::new(stream); + Self::extract_file_entry(reader, target) + .await + .map_err(ClientError::CopyFromContainerError) + } + + async fn extract_file_entry( + reader: R, + target: T, + ) -> Result + where + R: AsyncRead + Unpin, + T: CopyFileFromContainer, + { + let mut archive = AsyncTarArchive::new(reader); + let entries = archive.entries().map_err(CopyFromContainerError::Io)?; + + let files = + entries + .map_err(CopyFromContainerError::Io) + .try_filter_map(move |entry| async move { + match entry.header().entry_type() { + EntryType::GNULongName + | EntryType::GNULongLink + | EntryType::XGlobalHeader + | EntryType::XHeader + | EntryType::GNUSparse => Ok(None), // skip metadata entries + EntryType::Directory => Err(CopyFromContainerError::IsDirectory), + EntryType::Regular | EntryType::Continuous => return Ok(Some(entry)), + et @ _ => Err(CopyFromContainerError::UnsupportedEntry(et)), + } + }); + + pin_mut!(files); + + let first_file = files + .try_next() + .await? + .ok_or(CopyFromContainerError::EmptyArchive)?; + + if files.try_next().await?.is_some() { + return Err(CopyFromContainerError::MultipleFilesInArchive); + } + + target.copy_from_reader(first_file).await + } + pub(crate) async fn container_is_running( &self, container_id: &str, diff --git a/testcontainers/src/core/containers/async_container.rs b/testcontainers/src/core/containers/async_container.rs index 06752e39..a6d7a5a5 100644 --- a/testcontainers/src/core/containers/async_container.rs +++ b/testcontainers/src/core/containers/async_container.rs @@ -5,7 +5,10 @@ use tokio_stream::StreamExt; #[cfg(feature = "host-port-exposure")] use super::host::HostPortExposure; use crate::{ - core::{async_drop, client::Client, env, error::Result, network::Network, ContainerState}, + core::{ + async_drop, client::Client, copy::CopyFileFromContainer, env, error::Result, + network::Network, ContainerState, + }, ContainerRequest, Image, }; @@ -179,6 +182,24 @@ where Ok(exit_code) } + /// Copies a single file from the container into an arbitrary target implementing [`CopyFileFromContainer`]. + /// + /// # Behavior + /// - Regular files are streamed directly into the target (e.g. `PathBuf`, `Vec`). + /// - If `container_path` resolves to a directory, an error is returned and no data is written. + /// - Symlink handling follows Docker's `GET /containers/{id}/archive` endpoint behavior without extra processing. + pub async fn copy_file_from( + &self, + container_path: impl Into, + target: T, + ) -> Result + where + T: CopyFileFromContainer, + { + let container_path = container_path.into(); + self.raw.copy_file_from(container_path, target).await + } + /// Removes the container. pub async fn rm(mut self) -> Result<()> { log::debug!("Deleting docker container {}", self.id()); diff --git a/testcontainers/src/core/containers/async_container/raw.rs b/testcontainers/src/core/containers/async_container/raw.rs index 8497c8dc..ef50389b 100644 --- a/testcontainers/src/core/containers/async_container/raw.rs +++ b/testcontainers/src/core/containers/async_container/raw.rs @@ -5,6 +5,7 @@ use tokio::io::{AsyncBufRead, AsyncReadExt}; use super::{exec, Client}; use crate::{ core::{ + copy::CopyFileFromContainer, error::{ContainerMissingInfo, ExecError, Result}, ports::Ports, wait::WaitStrategy, @@ -38,6 +39,21 @@ impl RawContainer { self.docker_client.ports(&self.id).await.map_err(Into::into) } + pub(crate) async fn copy_file_from( + &self, + container_path: impl Into, + target: T, + ) -> Result + where + T: CopyFileFromContainer, + { + let container_path = container_path.into(); + self.docker_client + .copy_file_from_container(self.id(), &container_path, target) + .await + .map_err(TestcontainersError::from) + } + /// Returns the mapped host port for an internal port of this docker container, on the host's /// IPv4 interfaces. /// diff --git a/testcontainers/src/core/containers/sync_container.rs b/testcontainers/src/core/containers/sync_container.rs index 0aa5abd0..121301c0 100644 --- a/testcontainers/src/core/containers/sync_container.rs +++ b/testcontainers/src/core/containers/sync_container.rs @@ -1,7 +1,9 @@ use std::{fmt, io::BufRead, net::IpAddr, sync::Arc}; use crate::{ - core::{env, error::Result, ports::Ports, ContainerPort, ExecCommand}, + core::{ + copy::CopyFileFromContainer, env, error::Result, ports::Ports, ContainerPort, ExecCommand, + }, ContainerAsync, Image, }; @@ -130,6 +132,25 @@ where }) } + /// Copies a single file from the container into an arbitrary target implementing [`CopyFileFromContainer`]. + /// + /// # Behavior + /// - Regular files are streamed directly into the target (e.g. `PathBuf`, `Vec`). + /// - If `container_path` resolves to a directory, an error is returned and no data is written. + /// - Symlink handling follows Docker's `GET /containers/{id}/archive` endpoint behavior without extra processing. + pub fn copy_file_from( + &self, + container_path: impl Into, + target: T, + ) -> Result + where + T: CopyFileFromContainer, + { + let container_path = container_path.into(); + self.rt() + .block_on(self.async_impl().copy_file_from(container_path, target)) + } + /// Stops the container (not the same with `pause`) using the default 10 second timeout. pub fn stop(&self) -> Result<()> { self.rt().block_on(self.async_impl().stop()) diff --git a/testcontainers/src/core/copy.rs b/testcontainers/src/core/copy.rs index 39e34d4d..a0fc7f81 100644 --- a/testcontainers/src/core/copy.rs +++ b/testcontainers/src/core/copy.rs @@ -1,7 +1,8 @@ -use std::{ - io, - path::{Path, PathBuf}, -}; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt}; +use tokio_tar::EntryType; #[derive(Debug, Clone)] pub struct CopyToContainerCollection(Vec); @@ -18,10 +19,123 @@ pub enum CopyDataSource { Data(Vec), } +/// Errors that can occur while materializing data copied from a container. +#[derive(Debug, thiserror::Error)] +pub enum CopyFromContainerError { + #[error("io failed with error: {0}")] + Io(#[from] std::io::Error), + #[error("archive did not contain any regular files")] + EmptyArchive, + #[error("archive contained multiple files, but only one was expected")] + MultipleFilesInArchive, + #[error("requested container path is a directory")] + IsDirectory, + #[error("archive entry type '{0:?}' is not supported for requested target")] + UnsupportedEntry(EntryType), +} + +/// Abstraction for materializing the bytes read from a source into a concrete destination. +/// +/// Implementors typically persist the incoming bytes to disk or buffer them in memory and then +/// return a value that callers can work with (for example, the path that was written or the +/// collected bytes). Implementations must consume the provided reader until EOF or return an error. +#[async_trait(?Send)] +pub trait CopyFileFromContainer { + type Output; + + /// Writes all bytes from the reader into `self`, returning a value that represents the completed operation. + /// + /// Implementations may mutate `self` and must propagate I/O errors via [`CopyFromContainerError`]. + async fn copy_from_reader(self, reader: R) -> Result + where + R: AsyncRead + Unpin; +} + +#[async_trait(?Send)] +impl CopyFileFromContainer for Vec { + type Output = Vec; + + async fn copy_from_reader( + mut self, + reader: R, + ) -> Result + where + R: AsyncRead + Unpin, + { + let mut_ref = &mut self; + mut_ref.copy_from_reader(reader).await?; + Ok(self) + } +} + +#[async_trait(?Send)] +impl<'a> CopyFileFromContainer for &'a mut Vec { + type Output = (); + + async fn copy_from_reader( + mut self, + mut reader: R, + ) -> Result + where + R: AsyncRead + Unpin, + { + self.clear(); + reader + .read_to_end(&mut self) + .await + .map_err(CopyFromContainerError::Io)?; + Ok(()) + } +} + +#[async_trait(?Send)] +impl CopyFileFromContainer for PathBuf { + type Output = (); + + async fn copy_from_reader(self, reader: R) -> Result + where + R: AsyncRead + Unpin, + { + self.as_path().copy_from_reader(reader).await + } +} + +#[async_trait(?Send)] +impl CopyFileFromContainer for &Path { + type Output = (); + + async fn copy_from_reader( + self, + mut reader: R, + ) -> Result + where + R: AsyncRead + Unpin, + { + if let Some(parent) = self.parent() { + if !parent.as_os_str().is_empty() { + tokio::fs::create_dir_all(parent) + .await + .map_err(CopyFromContainerError::Io)?; + } + } + + let mut file = tokio::fs::File::create(self) + .await + .map_err(CopyFromContainerError::Io)?; + + tokio::io::copy(&mut reader, &mut file) + .await + .map_err(CopyFromContainerError::Io)?; + + file.flush().await.map_err(CopyFromContainerError::Io)?; + Ok(()) + } +} + #[derive(Debug, thiserror::Error)] pub enum CopyToContainerError { #[error("io failed with error: {0}")] - IoError(io::Error), + IoError(std::io::Error), #[error("failed to get the path name: {0}")] PathNameError(String), } @@ -85,6 +199,7 @@ impl From<&Path> for CopyDataSource { CopyDataSource::File(value.to_path_buf()) } } + impl From for CopyDataSource { fn from(value: PathBuf) -> Self { CopyDataSource::File(value) @@ -224,7 +339,7 @@ mod tests { assert!(result.is_err()); if let Err(CopyToContainerError::IoError(err)) = result { - assert_eq!(err.kind(), io::ErrorKind::NotFound); + assert_eq!(err.kind(), std::io::ErrorKind::NotFound); } else { panic!("Expected IoError"); } diff --git a/testcontainers/src/lib.rs b/testcontainers/src/lib.rs index b9c6556a..61b2c81c 100644 --- a/testcontainers/src/lib.rs +++ b/testcontainers/src/lib.rs @@ -90,7 +90,10 @@ pub use crate::core::Container; #[cfg(feature = "reusable-containers")] pub use crate::core::ReuseDirective; pub use crate::core::{ - copy::{CopyDataSource, CopyToContainer, CopyToContainerError}, + copy::{ + CopyDataSource, CopyFileFromContainer, CopyFromContainerError, CopyToContainer, + CopyToContainerError, + }, error::TestcontainersError, BuildableImage, ContainerAsync, ContainerRequest, Healthcheck, Image, ImageExt, }; diff --git a/testcontainers/tests/async_runner.rs b/testcontainers/tests/async_runner.rs index ae6baed3..34795777 100644 --- a/testcontainers/tests/async_runner.rs +++ b/testcontainers/tests/async_runner.rs @@ -292,6 +292,46 @@ async fn async_copy_files_to_container() -> anyhow::Result<()> { Ok(()) } +#[tokio::test] +async fn async_copy_file_from_container_to_path() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "echo '42' > /tmp/result.txt && sleep 10"]) + .start() + .await?; + + let destination_dir = tempfile::tempdir()?; + let destination = destination_dir.path().join("result.txt"); + + container + .copy_file_from("/tmp/result.txt", destination.as_path()) + .await?; + + let copied = tokio::fs::read_to_string(&destination).await?; + assert_eq!(copied, "42\n"); + + container.stop().await?; + Ok(()) +} + +#[tokio::test] +async fn async_copy_file_from_container_into_mut_vec() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "echo 'buffer' > /tmp/result.txt && sleep 10"]) + .start() + .await?; + + let mut buffer = Vec::new(); + container + .copy_file_from("/tmp/result.txt", &mut buffer) + .await?; + assert_eq!(buffer, b"buffer\n"); + + container.stop().await?; + Ok(()) +} + #[tokio::test] async fn async_container_is_running() -> anyhow::Result<()> { let _ = pretty_env_logger::try_init(); diff --git a/testcontainers/tests/sync_runner.rs b/testcontainers/tests/sync_runner.rs index 60aaa22e..e712baae 100644 --- a/testcontainers/tests/sync_runner.rs +++ b/testcontainers/tests/sync_runner.rs @@ -303,6 +303,40 @@ fn sync_copy_files_to_container() -> anyhow::Result<()> { Ok(()) } +#[test] +fn sync_copy_file_from_container_to_path() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "echo 'sync path' > /tmp/result.txt && sleep 30"]) + .start()?; + + let destination_dir = tempfile::tempdir()?; + let destination = destination_dir.path().join("result.txt"); + + container.copy_file_from("/tmp/result.txt", destination.as_path())?; + + let copied = std::fs::read_to_string(&destination)?; + assert_eq!(copied, "sync path\n"); + + container.stop()?; + Ok(()) +} + +#[test] +fn sync_copy_file_from_container_into_mut_vec() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "echo 'sync vec' > /tmp/result.txt && sleep 30"]) + .start()?; + + let mut buffer = Vec::new(); + container.copy_file_from("/tmp/result.txt", &mut buffer)?; + assert_eq!(buffer, b"sync vec\n"); + + container.stop()?; + Ok(()) +} + #[test] fn sync_container_is_running() -> anyhow::Result<()> { let _ = pretty_env_logger::try_init(); From 4fe5939dc76159db3b468240250d9409a0f994d3 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Wed, 5 Nov 2025 01:20:34 +0800 Subject: [PATCH 2/4] feat: enhance documentation for file copying and data handling --- docs/features/files.md | 3 ++- testcontainers/src/core/copy.rs | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/features/files.md b/docs/features/files.md index ef6d7b36..d09686ed 100644 --- a/docs/features/files.md +++ b/docs/features/files.md @@ -48,6 +48,7 @@ async fn copy_example() -> anyhow::Result<()> { - `copy_file_from` streams the sole regular-file entry produced by Docker into any destination implementing `CopyFileFromContainer` (for example `&Path`, `PathBuf`, `Vec`, or `&mut Vec`). It verifies that **exactly one** file exists and returns an error (e.g., `CopyFileError::UnexpectedDirectory`) when the path resolves to a directory or an unsupported TAR record. +- Targets like `Vec` and filesystem paths overwrite existing data: vectors are cleared before writing, and files are truncated or recreated if they already exist. - To capture the contents in memory: ```rust let mut bytes = Vec::new(); @@ -84,4 +85,4 @@ Bind mounts share host state directly. Tmpfs mounts create ephemeral in-memory s - **Use mounts** — when containers need to read/write large amounts of data efficiently without re-tarring. Mixing these tools keeps tests hermetic (isolated and reproducible) while letting you inspect outputs locally. -Document each choice in code so teammates know whether data is ephemeral (`tmpfs`), seeded once (`with_copy_to`), or captured for later assertions (`copy_file_from`). \ No newline at end of file +Document each choice in code so teammates know whether data is ephemeral (`tmpfs`), seeded once (`with_copy_to`), or captured for later assertions (`copy_file_from`). diff --git a/testcontainers/src/core/copy.rs b/testcontainers/src/core/copy.rs index a0fc7f81..241c563a 100644 --- a/testcontainers/src/core/copy.rs +++ b/testcontainers/src/core/copy.rs @@ -36,14 +36,16 @@ pub enum CopyFromContainerError { /// Abstraction for materializing the bytes read from a source into a concrete destination. /// -/// Implementors typically persist the incoming bytes to disk or buffer them in memory and then -/// return a value that callers can work with (for example, the path that was written or the -/// collected bytes). Implementations must consume the provided reader until EOF or return an error. +/// Implementors typically persist the incoming bytes to disk or buffer them in memory. Some return +/// a value that callers can work with (for example, the collected bytes), while others simply +/// report success with `()`. Implementations must consume the provided reader until EOF or return +/// an error. Destinations are allowed to discard any existing data to make room for the incoming +/// bytes. #[async_trait(?Send)] pub trait CopyFileFromContainer { type Output; - /// Writes all bytes from the reader into `self`, returning a value that represents the completed operation. + /// Writes all bytes from the reader into `self`, returning a value that represents the completed operation (or `()` for sinks that only confirm success). /// /// Implementations may mutate `self` and must propagate I/O errors via [`CopyFromContainerError`]. async fn copy_from_reader(self, reader: R) -> Result From 7830e29ed1ceee81922b11707e93c8287cfb8fc9 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Wed, 5 Nov 2025 02:29:48 +0800 Subject: [PATCH 3/4] fix(copy): stream first tar entry and simplify docs --- docs/features/files.md | 3 +- testcontainers/src/core/client.rs | 42 ++++++++----------- .../src/core/containers/async_container.rs | 1 + .../src/core/containers/sync_container.rs | 1 + testcontainers/src/core/copy.rs | 2 - 5 files changed, 21 insertions(+), 28 deletions(-) diff --git a/docs/features/files.md b/docs/features/files.md index d09686ed..93a6e838 100644 --- a/docs/features/files.md +++ b/docs/features/files.md @@ -46,8 +46,7 @@ async fn copy_example() -> anyhow::Result<()> { } ``` -- `copy_file_from` streams the sole regular-file entry produced by Docker into any destination implementing `CopyFileFromContainer` (for example `&Path`, `PathBuf`, `Vec`, or `&mut Vec`). - It verifies that **exactly one** file exists and returns an error (e.g., `CopyFileError::UnexpectedDirectory`) when the path resolves to a directory or an unsupported TAR record. +- `copy_file_from` streams file contents into any destination implementing `CopyFileFromContainer` (for example `&Path` or `&mut Vec`). When the requested path is not a regular file you’ll receive a `CopyFromContainerError`. - Targets like `Vec` and filesystem paths overwrite existing data: vectors are cleared before writing, and files are truncated or recreated if they already exist. - To capture the contents in memory: ```rust diff --git a/testcontainers/src/core/client.rs b/testcontainers/src/core/client.rs index 7f4d2fe3..caf2ec5f 100644 --- a/testcontainers/src/core/client.rs +++ b/testcontainers/src/core/client.rs @@ -445,34 +445,28 @@ impl Client { let mut archive = AsyncTarArchive::new(reader); let entries = archive.entries().map_err(CopyFromContainerError::Io)?; - let files = - entries - .map_err(CopyFromContainerError::Io) - .try_filter_map(move |entry| async move { - match entry.header().entry_type() { - EntryType::GNULongName - | EntryType::GNULongLink - | EntryType::XGlobalHeader - | EntryType::XHeader - | EntryType::GNUSparse => Ok(None), // skip metadata entries - EntryType::Directory => Err(CopyFromContainerError::IsDirectory), - EntryType::Regular | EntryType::Continuous => return Ok(Some(entry)), - et @ _ => Err(CopyFromContainerError::UnsupportedEntry(et)), - } - }); - - pin_mut!(files); + pin_mut!(entries); - let first_file = files + while let Some(entry) = entries .try_next() - .await? - .ok_or(CopyFromContainerError::EmptyArchive)?; - - if files.try_next().await?.is_some() { - return Err(CopyFromContainerError::MultipleFilesInArchive); + .await + .map_err(CopyFromContainerError::Io)? + { + match entry.header().entry_type() { + EntryType::GNULongName + | EntryType::GNULongLink + | EntryType::XGlobalHeader + | EntryType::XHeader + | EntryType::GNUSparse => continue, // skip metadata entries + EntryType::Directory => return Err(CopyFromContainerError::IsDirectory), + EntryType::Regular | EntryType::Continuous => { + return target.copy_from_reader(entry).await + } + et @ _ => return Err(CopyFromContainerError::UnsupportedEntry(et)), + } } - target.copy_from_reader(first_file).await + Err(CopyFromContainerError::EmptyArchive) } pub(crate) async fn container_is_running( diff --git a/testcontainers/src/core/containers/async_container.rs b/testcontainers/src/core/containers/async_container.rs index a6d7a5a5..4cb8d684 100644 --- a/testcontainers/src/core/containers/async_container.rs +++ b/testcontainers/src/core/containers/async_container.rs @@ -186,6 +186,7 @@ where /// /// # Behavior /// - Regular files are streamed directly into the target (e.g. `PathBuf`, `Vec`). + /// - Additional archive entries (metadata or other files) are skipped after the first regular file. /// - If `container_path` resolves to a directory, an error is returned and no data is written. /// - Symlink handling follows Docker's `GET /containers/{id}/archive` endpoint behavior without extra processing. pub async fn copy_file_from( diff --git a/testcontainers/src/core/containers/sync_container.rs b/testcontainers/src/core/containers/sync_container.rs index 121301c0..c6b228ec 100644 --- a/testcontainers/src/core/containers/sync_container.rs +++ b/testcontainers/src/core/containers/sync_container.rs @@ -136,6 +136,7 @@ where /// /// # Behavior /// - Regular files are streamed directly into the target (e.g. `PathBuf`, `Vec`). + /// - Additional archive entries (metadata or other files) are skipped after the first regular file. /// - If `container_path` resolves to a directory, an error is returned and no data is written. /// - Symlink handling follows Docker's `GET /containers/{id}/archive` endpoint behavior without extra processing. pub fn copy_file_from( diff --git a/testcontainers/src/core/copy.rs b/testcontainers/src/core/copy.rs index 241c563a..95c335f4 100644 --- a/testcontainers/src/core/copy.rs +++ b/testcontainers/src/core/copy.rs @@ -26,8 +26,6 @@ pub enum CopyFromContainerError { Io(#[from] std::io::Error), #[error("archive did not contain any regular files")] EmptyArchive, - #[error("archive contained multiple files, but only one was expected")] - MultipleFilesInArchive, #[error("requested container path is a directory")] IsDirectory, #[error("archive entry type '{0:?}' is not supported for requested target")] From 0d6b1fcf1d9e91771269f7a3fc17e85a264463dd Mon Sep 17 00:00:00 2001 From: DCjanus Date: Wed, 5 Nov 2025 10:58:00 +0800 Subject: [PATCH 4/4] test(copy-file-from): cover directory extraction error --- testcontainers/tests/async_runner.rs | 28 +++++++++++++++++++++++++++- testcontainers/tests/sync_runner.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/testcontainers/tests/async_runner.rs b/testcontainers/tests/async_runner.rs index 34795777..25da644d 100644 --- a/testcontainers/tests/async_runner.rs +++ b/testcontainers/tests/async_runner.rs @@ -6,9 +6,11 @@ use bollard::{ }; use testcontainers::{ core::{ + client::ClientError, + error::TestcontainersError, logs::{consumer::logging_consumer::LoggingConsumer, LogFrame}, wait::{ExitWaitStrategy, LogWaitStrategy}, - BuildImageOptions, CmdWaitFor, ExecCommand, WaitFor, + BuildImageOptions, CmdWaitFor, CopyFromContainerError, ExecCommand, WaitFor, }, runners::{AsyncBuilder, AsyncRunner}, GenericBuildableImage, GenericImage, Image, ImageExt, @@ -332,6 +334,30 @@ async fn async_copy_file_from_container_into_mut_vec() -> anyhow::Result<()> { Ok(()) } +#[tokio::test] +async fn async_copy_file_from_container_directory_errors() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "mkdir -p /tmp/result_dir && sleep 10"]) + .start() + .await?; + + let err = container + .copy_file_from("/tmp/result_dir", Vec::::new()) + .await + .expect_err("expected directory copy to fail"); + + match err { + TestcontainersError::Client(ClientError::CopyFromContainerError( + CopyFromContainerError::IsDirectory, + )) => {} + other => panic!("unexpected error: {other:?}"), + } + + container.stop().await?; + Ok(()) +} + #[tokio::test] async fn async_container_is_running() -> anyhow::Result<()> { let _ = pretty_env_logger::try_init(); diff --git a/testcontainers/tests/sync_runner.rs b/testcontainers/tests/sync_runner.rs index e712baae..b6def611 100644 --- a/testcontainers/tests/sync_runner.rs +++ b/testcontainers/tests/sync_runner.rs @@ -4,9 +4,12 @@ use std::time::Instant; use testcontainers::{ core::{ + client::ClientError, + error::TestcontainersError, logs::{consumer::logging_consumer::LoggingConsumer, LogFrame}, wait::LogWaitStrategy, - BuildImageOptions, CmdWaitFor, ExecCommand, Host, IntoContainerPort, WaitFor, + BuildImageOptions, CmdWaitFor, CopyFromContainerError, ExecCommand, Host, + IntoContainerPort, WaitFor, }, runners::{SyncBuilder, SyncRunner}, GenericBuildableImage, *, @@ -337,6 +340,28 @@ fn sync_copy_file_from_container_into_mut_vec() -> anyhow::Result<()> { Ok(()) } +#[test] +fn sync_copy_file_from_container_directory_errors() -> anyhow::Result<()> { + let container = GenericImage::new("alpine", "latest") + .with_wait_for(WaitFor::seconds(1)) + .with_cmd(["sh", "-c", "mkdir -p /tmp/result_dir && sleep 30"]) + .start()?; + + let err = container + .copy_file_from("/tmp/result_dir", Vec::::new()) + .expect_err("expected directory copy to fail"); + + match err { + TestcontainersError::Client(ClientError::CopyFromContainerError( + CopyFromContainerError::IsDirectory, + )) => {} + other => panic!("unexpected error: {other:?}"), + } + + container.stop()?; + Ok(()) +} + #[test] fn sync_container_is_running() -> anyhow::Result<()> { let _ = pretty_env_logger::try_init();