From 2cec2566f63fd6a843ce3e7810902895700ebf4b Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Wed, 16 Jul 2025 17:42:34 +0200 Subject: [PATCH] add some more checking and warning --- src/metadata.rs | 215 ++++++++++++++++++++++++- src/packaging.rs | 155 ++++++++++++++++-- src/packaging/metadata.rs | 7 +- src/post_process/mod.rs | 1 + src/post_process/path_checks.rs | 277 ++++++++++++++++++++++++++++++++ src/recipe/custom_yaml.rs | 30 ++++ src/recipe/parser.rs | 3 +- src/recipe/parser/build.rs | 5 + 8 files changed, 680 insertions(+), 13 deletions(-) create mode 100644 src/post_process/path_checks.rs diff --git a/src/metadata.rs b/src/metadata.rs index bef30275b..50b3588ea 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,7 +1,7 @@ //! All the metadata that makes up a recipe file use std::{ borrow::Cow, - collections::BTreeMap, + collections::{BTreeMap, HashMap}, fmt::{self, Display, Formatter}, io::Write, iter, @@ -11,6 +11,7 @@ use std::{ }; use chrono::{DateTime, Utc}; +use console; use dunce::canonicalize; use fs_err as fs; use indicatif::HumanBytes; @@ -435,6 +436,103 @@ pub struct BuildSummary { pub paths: Option, /// Whether the build was successful or not pub failed: bool, + /// Package size statistics + pub package_stats: Option, +} + +/// Statistics about the packaged files +#[derive(Debug, Clone)] +pub struct PackageStats { + /// Compressed size of the package + pub compressed_size: u64, + /// Uncompressed size of all files + pub uncompressed_size: u64, + /// Number of files in the package + pub file_count: usize, + /// Number of directories in the package + pub directory_count: usize, + /// Number of compiled files (based on extensions) + pub compiled_file_count: usize, + /// Size breakdown by file extension + pub size_by_extension: Vec<(String, u64, f64)>, // (extension, size, percentage) + /// List of largest files (up to 10) + pub largest_files: Vec<(String, u64)>, // (path, size) +} + +impl PackageStats { + /// Calculate package statistics from PathsJson and compressed size + pub fn from_paths_json(paths_json: &PathsJson, compressed_size: u64) -> Self { + let mut uncompressed_size = 0u64; + let mut file_count = 0; + let mut directory_count = 0; + let mut compiled_file_count = 0; + let mut size_by_extension: HashMap = HashMap::new(); + let mut all_files: Vec<(String, u64)> = Vec::new(); + + // Compiled file extensions (common binary/compiled formats) + let compiled_extensions = [ + "so", "dylib", "dll", "exe", "o", "obj", "lib", "a", "pyc", "pyo", "whl", + ]; + + for entry in &paths_json.paths { + let size = entry.size_in_bytes.unwrap_or(0); + uncompressed_size += size; + + match entry.path_type { + rattler_conda_types::package::PathType::Directory => { + directory_count += 1; + } + _ => { + file_count += 1; + let path_str = entry.relative_path.to_string_lossy(); + all_files.push((path_str.to_string(), size)); + + // Get file extension + let extension = if let Some(extension) = entry.relative_path.extension() { + extension.to_string_lossy().to_lowercase() + } else { + "no-extension".to_string() + }; + + // Check if it's a compiled file + if compiled_extensions.contains(&extension.as_str()) { + compiled_file_count += 1; + } + + // Add to size by extension + *size_by_extension.entry(extension).or_insert(0) += size; + } + } + } + + // Sort extensions by size (largest first) + let mut size_by_extension: Vec<(String, u64, f64)> = size_by_extension + .into_iter() + .map(|(ext, size)| { + let percentage = if uncompressed_size > 0 { + (size as f64 / uncompressed_size as f64) * 100.0 + } else { + 0.0 + }; + (ext, size, percentage) + }) + .collect(); + size_by_extension.sort_by(|a, b| b.1.cmp(&a.1)); + + // Get top 10 largest files + all_files.sort_by(|a, b| b.1.cmp(&a.1)); + let largest_files = all_files.into_iter().take(10).collect(); + + Self { + compressed_size, + uncompressed_size, + file_count, + directory_count, + compiled_file_count, + size_by_extension, + largest_files, + } + } } /// A output. This is the central element that is passed to the `run_build` @@ -525,6 +623,12 @@ impl Output { let mut summary = self.build_summary.lock().unwrap(); summary.artifact = Some(artifact.to_path_buf()); summary.paths = Some(paths.clone()); + + // Calculate package statistics + if let Ok(metadata) = fs::metadata(artifact) { + let compressed_size = metadata.len(); + summary.package_stats = Some(PackageStats::from_paths_json(paths, compressed_size)); + } } /// Record the end of the build @@ -592,6 +696,12 @@ impl Output { } else { tracing::info!("No artifact was created"); } + + // Print enhanced package statistics + if let Some(stats) = &summary.package_stats { + self.print_package_stats(stats); + } + tracing::info!("{}", self); if !summary.warnings.is_empty() { @@ -685,6 +795,103 @@ impl Output { } Ok(()) } + + /// Print enhanced package statistics + fn print_package_stats(&self, stats: &PackageStats) { + let compression_ratio = if stats.uncompressed_size > 0 { + if stats.compressed_size <= stats.uncompressed_size { + ((stats.uncompressed_size - stats.compressed_size) as f64 + / stats.uncompressed_size as f64) + * 100.0 + } else { + // Handle case where compressed size is larger than uncompressed (overhead) + -((stats.compressed_size - stats.uncompressed_size) as f64 + / stats.uncompressed_size as f64) + * 100.0 + } + } else { + 0.0 + }; + + tracing::info!(""); + tracing::info!("{}", console::style("┌─ Package Statistics").bold().blue()); + tracing::info!("│"); + tracing::info!("├─ {}", console::style("File Size").bold().cyan()); + tracing::info!( + "│ ├─ compressed size: {}", + console::style(HumanBytes(stats.compressed_size)).green() + ); + tracing::info!( + "│ ├─ uncompressed size: {}", + console::style(HumanBytes(stats.uncompressed_size)).green() + ); + tracing::info!( + "│ └─ compression space saving: {}", + if compression_ratio >= 0.0 { + console::style(format!("{:.1}%", compression_ratio)).green() + } else { + console::style(format!("{:.1}%", compression_ratio)).red() + } + ); + tracing::info!("│"); + tracing::info!("├─ {}", console::style("Contents").bold().cyan()); + tracing::info!( + "│ ├─ directories: {}", + console::style(stats.directory_count).yellow() + ); + tracing::info!( + "│ └─ files: {} ({} compiled)", + console::style(stats.file_count).yellow(), + console::style(stats.compiled_file_count).magenta() + ); + + if !stats.size_by_extension.is_empty() { + tracing::info!("│"); + tracing::info!("├─ {}", console::style("Size by Extension").bold().cyan()); + let total_extensions = stats.size_by_extension.len(); + for (i, (extension, size, percentage)) in stats.size_by_extension.iter().enumerate() { + let display_ext = if extension == "no-extension" { + "no-extension".to_string() + } else { + format!(".{}", extension) + }; + let connector = if i == total_extensions - 1 { + "└─" + } else { + "├─" + }; + tracing::info!( + "│ {} {} - {} ({})", + connector, + console::style(&display_ext).white(), + console::style(HumanBytes(*size)).green(), + console::style(format!("{:.1}%", percentage)).dim() + ); + } + } + + if !stats.largest_files.is_empty() { + tracing::info!("│"); + tracing::info!("├─ {}", console::style("Largest Files").bold().cyan()); + let total_files = stats.largest_files.len(); + for (i, (path, size)) in stats.largest_files.iter().enumerate() { + let connector = if i == total_files - 1 { + "└─" + } else { + "├─" + }; + tracing::info!( + "│ {} {} {}", + connector, + console::style(format!("({})", HumanBytes(*size))).green(), + console::style(path).white() + ); + } + } + + tracing::info!("└─"); + tracing::info!(""); + } } impl Output { @@ -755,6 +962,12 @@ impl Display for Output { } } +impl crate::post_process::path_checks::WarningRecorder for Output { + fn record_warning(&self, warning: &str) { + self.record_warning(warning); + } +} + /// Builds the channel list and reindexes the output channel. pub async fn build_reindexed_channels( build_configuration: &BuildConfiguration, diff --git a/src/packaging.rs b/src/packaging.rs index 8682cecfa..fd75b2c84 100644 --- a/src/packaging.rs +++ b/src/packaging.rs @@ -6,8 +6,11 @@ use std::{ path::{Component, Path, PathBuf}, }; +use console; + use fs_err as fs; use fs_err::File; +use indicatif::HumanBytes; use metadata::clean_url; use rattler_conda_types::{ ChannelUrl, Platform, @@ -380,6 +383,146 @@ where /// dependencies finalized before calling this function. /// /// The `local_channel_dir` is the path to the local channel / output directory. +/// Print enhanced file listing with sizes, symlink targets, and warnings +fn print_enhanced_file_listing( + files: &[&Path], + tmp: &TempFiles, + _output: &Output, +) -> Result<(), PackagingError> { + let normalize_path = |p: &Path| -> String { p.display().to_string().replace('\\', "/") }; + + // Get all path entries for path checks + let path_entries: Vec = files + .iter() + .map(|f| { + let full_path = tmp.temp_dir.path().join(f); + let metadata = fs::metadata(&full_path).unwrap_or_else(|_| { + // Create a dummy metadata for directories or if file doesn't exist + fs::metadata(".").unwrap() + }); + + let path_type = if full_path.is_symlink() { + rattler_conda_types::package::PathType::SoftLink + } else if full_path.is_dir() { + rattler_conda_types::package::PathType::Directory + } else { + rattler_conda_types::package::PathType::HardLink + }; + + rattler_conda_types::package::PathsEntry { + relative_path: f.to_path_buf(), + path_type, + sha256: None, + prefix_placeholder: None, + no_link: false, + size_in_bytes: if path_type == rattler_conda_types::package::PathType::Directory { + None + } else { + Some(metadata.len()) + }, + } + }) + .collect(); + + // Get warnings for each path + let mut path_warnings: std::collections::HashMap> = + std::collections::HashMap::new(); + + // Individual path checks + for entry in &path_entries { + let mut warnings = Vec::new(); + let path = &entry.relative_path; + + // Check for non-ASCII characters + if let Some(path_str) = path.to_str() { + if !path_str.is_ascii() { + warnings.push("Contains non-ASCII characters".to_string()); + } + if path_str.contains(' ') { + warnings.push("Contains spaces".to_string()); + } + if path_str.len() > 255 { + warnings.push(format!("Path too long ({} > 255)", path_str.len())); + } + } + + if !warnings.is_empty() { + path_warnings.insert(path.clone(), warnings); + } + } + + // Print each file with enhanced information + for file in files { + let full_path = tmp.temp_dir.path().join(file); + let is_info = file.components().next() == Some(Component::Normal("info".as_ref())); + let normalized_path = normalize_path(file); + + // Get file size + let size_info = if let Ok(metadata) = fs::metadata(&full_path) { + if full_path.is_dir() { + " (dir)".to_string() + } else { + format!(" ({})", HumanBytes(metadata.len())) + } + } else { + "".to_string() + }; + + // Check if it's a symlink and get target + let symlink_info = if full_path.is_symlink() { + match fs::read_link(&full_path) { + Ok(target) => { + let target_str = target.display().to_string(); + format!(" -> {}", console::style(target_str).cyan()) + } + Err(_) => " -> ".to_string(), + } + } else { + "".to_string() + }; + + // Format the main file entry + let file_entry = if is_info { + format!( + " - {}{}{}", + console::style(&normalized_path).dim(), + console::style(&size_info).dim(), + symlink_info + ) + } else if full_path.is_symlink() { + format!( + " - {}{}{}", + console::style(&normalized_path).magenta(), + console::style(&size_info).dim(), + symlink_info + ) + } else { + format!( + " - {}{}{}", + normalized_path, + console::style(&size_info).dim(), + symlink_info + ) + }; + + tracing::info!("{}", file_entry); + + // Print warnings for this file + if let Some(warnings) = path_warnings.get(&file.to_path_buf()) { + for warning in warnings { + tracing::warn!(" ╰─ ⚠ {}", console::style(warning).yellow()); + } + } + } + + Ok(()) +} + +/// This function creates a conda package from the given output. It will +/// create a conda package from that. Note that the output needs to have its +/// dependencies finalized before calling this function. +/// +/// The `local_channel_dir` is the path to the local channel / output directory. pub fn package_conda( output: &Output, tool_configuration: &tool_configuration::Configuration, @@ -435,7 +578,7 @@ pub fn package_conda( tmp.add_files(vec![info_folder.join("link.json")]); } - // print sorted files + // print sorted files with enhanced information tracing::info!("\nFiles in package:\n"); let mut files = tmp .files @@ -466,15 +609,7 @@ pub fn package_conda( output.record_warning(&warn_str); } - let normalize_path = |p: &Path| -> String { p.display().to_string().replace('\\', "/") }; - - files.iter().for_each(|f| { - if f.components().next() == Some(Component::Normal("info".as_ref())) { - tracing::info!(" - {}", console::style(normalize_path(f)).dim()) - } else { - tracing::info!(" - {}", normalize_path(f)) - } - }); + print_enhanced_file_listing(&files, &tmp, output)?; let output_folder = local_channel_dir.join(output.build_configuration.target_platform.to_string()); diff --git a/src/packaging/metadata.rs b/src/packaging/metadata.rs index 6fed7b2f5..bcaaf507b 100644 --- a/src/packaging/metadata.rs +++ b/src/packaging/metadata.rs @@ -511,8 +511,13 @@ impl Output { fs::create_dir_all(&info_folder)?; let paths_json_path = root_dir.join(PathsJson::package_path()); + let paths_json_data = self.paths_json(temp_files)?; + + // Perform path checks and emit warnings + crate::post_process::path_checks::perform_path_checks(self, &paths_json_data.paths); + let paths_json = File::create(&paths_json_path)?; - serde_json::to_writer_pretty(paths_json, &self.paths_json(temp_files)?)?; + serde_json::to_writer_pretty(paths_json, &paths_json_data)?; new_files.insert(paths_json_path); let index_json_path = root_dir.join(IndexJson::package_path()); diff --git a/src/post_process/mod.rs b/src/post_process/mod.rs index f3d623015..7a8bcc196 100644 --- a/src/post_process/mod.rs +++ b/src/post_process/mod.rs @@ -1,6 +1,7 @@ pub mod checks; pub mod menuinst; pub mod package_nature; +pub mod path_checks; pub mod python; pub mod regex_replacements; pub mod relink; diff --git a/src/post_process/path_checks.rs b/src/post_process/path_checks.rs new file mode 100644 index 000000000..3ad3d0e71 --- /dev/null +++ b/src/post_process/path_checks.rs @@ -0,0 +1,277 @@ +use std::collections::HashMap; +use std::path::Path; + +use rattler_conda_types::package::PathsEntry; + +// Trait for types that can record warnings +pub trait WarningRecorder { + fn record_warning(&self, warning: &str); +} + +const FILE_EXTENSION_GROUPS: &[&[&str]] = &[ + &[".cc", ".CC", ".cpp", ".CPP"], + &[".htm", ".HTM", ".html", ".HTML"], + &[".jpg", ".JPG", ".jpeg", ".JPEG"], + &[".jsonl", ".JSONL", ".ndjson", ".NDJSON"], + &[".txt", ".TXT", ".text", ".TEXT"], + &[".yaml", ".YAML", ".yml", ".YML"], +]; + +/// Perform path validation checks on the given paths and emit warnings +pub fn perform_path_checks(output: &dyn WarningRecorder, paths_entries: &[PathsEntry]) { + // Convert PathsEntry to paths for easier processing + let all_paths: Vec<&Path> = paths_entries + .iter() + .map(|entry| entry.relative_path.as_path()) + .collect(); + + // Check for non-ASCII characters + check_non_ascii_characters(&all_paths, output); + + // Check for spaces in paths + check_spaces_in_paths(&all_paths, output); + + // Check path length (default limit: 255 characters) + check_path_length(&all_paths, 255, output); + + // Check for case-insensitive collisions + check_case_collisions(&all_paths, output); + + // Check for mixed file extensions + check_mixed_file_extensions(&all_paths, output); +} + +fn check_non_ascii_characters(paths: &[&Path], output: &dyn WarningRecorder) { + for path in paths { + if let Some(path_str) = path.to_str() { + if !path_str.is_ascii() { + output.record_warning(&format!( + "Path contains non-ASCII characters: '{}'", + path_str + )); + } + } + } +} + +fn check_spaces_in_paths(paths: &[&Path], output: &dyn WarningRecorder) { + for path in paths { + if let Some(path_str) = path.to_str() { + if path_str.contains(' ') { + output.record_warning(&format!("Path contains spaces: '{}'", path_str)); + } + } + } +} + +fn check_path_length(paths: &[&Path], max_length: usize, output: &dyn WarningRecorder) { + for path in paths { + if let Some(path_str) = path.to_str() { + let length = path_str.len(); + if length > max_length { + output.record_warning(&format!( + "Path too long ({} > {}): '{}'", + length, max_length, path_str + )); + } + } + } +} + +fn check_case_collisions(paths: &[&Path], output: &dyn WarningRecorder) { + let mut path_lower_to_original: HashMap> = HashMap::new(); + + for path in paths { + if let Some(path_str) = path.to_str() { + let lower = path_str.to_lowercase(); + path_lower_to_original + .entry(lower) + .or_default() + .push(path_str.to_string()); + } + } + + for (_, originals) in path_lower_to_original { + if originals.len() > 1 { + let files_str = originals.join(", "); + output.record_warning(&format!( + "Found files which differ only by case: {}", + files_str + )); + } + } +} + +fn check_mixed_file_extensions(paths: &[&Path], output: &dyn WarningRecorder) { + let mut extension_counts: HashMap = HashMap::new(); + + // Count occurrences of each extension + for path in paths { + if let Some(ext) = path.extension() { + if let Some(ext_str) = ext.to_str() { + let ext_with_dot = format!(".{}", ext_str); + *extension_counts.entry(ext_with_dot).or_insert(0) += 1; + } + } + } + + // Check each group of related extensions + for group in FILE_EXTENSION_GROUPS { + let mut found_extensions = Vec::new(); + for ext in *group { + if extension_counts.contains_key(*ext) { + found_extensions.push(*ext); + } + } + + if found_extensions.len() >= 2 { + let extensions_str = found_extensions + .iter() + .map(|ext| format!("{} ({})", ext, extension_counts.get(*ext).unwrap_or(&0))) + .collect::>() + .join(", "); + + output.record_warning(&format!( + "Found a mix of file extensions for the same file type: {}", + extensions_str + )); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rattler_conda_types::package::{PathType, PathsEntry}; + use std::path::PathBuf; + use std::sync::{Arc, Mutex}; + + // Mock struct to capture warnings + #[derive(Default)] + struct MockOutput { + warnings: Arc>>, + } + + impl MockOutput { + fn new() -> Self { + Self::default() + } + + fn warnings(&self) -> Vec { + self.warnings.lock().unwrap().clone() + } + } + + impl WarningRecorder for MockOutput { + fn record_warning(&self, warning: &str) { + self.warnings.lock().unwrap().push(warning.to_string()); + } + } + + fn create_test_entry(path: &str, path_type: PathType) -> PathsEntry { + PathsEntry { + relative_path: PathBuf::from(path), + path_type, + sha256: None, + prefix_placeholder: None, + no_link: false, + size_in_bytes: None, + } + } + + #[test] + fn test_non_ascii_characters() { + let entries = vec![ + create_test_entry("normal/file.txt", PathType::HardLink), + create_test_entry("café/file.txt", PathType::HardLink), + create_test_entry("文件/test.py", PathType::HardLink), + ]; + + let output = MockOutput::new(); + perform_path_checks(&output, &entries); + + let warnings = output.warnings(); + assert_eq!(warnings.len(), 2); + assert!(warnings.iter().any(|w| w.contains("café"))); + assert!(warnings.iter().any(|w| w.contains("文件"))); + } + + #[test] + fn test_spaces_in_paths() { + let entries = vec![ + create_test_entry("normal/file.txt", PathType::HardLink), + create_test_entry("has space/file.txt", PathType::HardLink), + create_test_entry("another file.txt", PathType::HardLink), + ]; + + let output = MockOutput::new(); + perform_path_checks(&output, &entries); + + let warnings = output.warnings(); + assert_eq!(warnings.iter().filter(|w| w.contains("spaces")).count(), 2); + } + + #[test] + fn test_path_too_long() { + let long_path = "a".repeat(300); + let entries = vec![ + create_test_entry("normal/file.txt", PathType::HardLink), + create_test_entry(&long_path, PathType::HardLink), + ]; + + let output = MockOutput::new(); + perform_path_checks(&output, &entries); + + let warnings = output.warnings(); + assert_eq!( + warnings.iter().filter(|w| w.contains("too long")).count(), + 1 + ); + assert!(warnings.iter().any(|w| w.contains("300 > 255"))); + } + + #[test] + fn test_case_collisions() { + let entries = vec![ + create_test_entry("file.txt", PathType::HardLink), + create_test_entry("File.txt", PathType::HardLink), + create_test_entry("FILE.TXT", PathType::HardLink), + create_test_entry("other.py", PathType::HardLink), + ]; + + let output = MockOutput::new(); + perform_path_checks(&output, &entries); + + let warnings = output.warnings(); + assert_eq!( + warnings + .iter() + .filter(|w| w.contains("differ only by case")) + .count(), + 1 + ); + } + + #[test] + fn test_mixed_extensions() { + let entries = vec![ + create_test_entry("file.txt", PathType::HardLink), + create_test_entry("file.TXT", PathType::HardLink), + create_test_entry("file.text", PathType::HardLink), + create_test_entry("doc.yaml", PathType::HardLink), + create_test_entry("doc.yml", PathType::HardLink), + ]; + + let output = MockOutput::new(); + perform_path_checks(&output, &entries); + + let warnings = output.warnings(); + assert_eq!( + warnings + .iter() + .filter(|w| w.contains("mix of file extensions")) + .count(), + 2 + ); // txt/TXT/text and yaml/yml groups + } +} diff --git a/src/recipe/custom_yaml.rs b/src/recipe/custom_yaml.rs index 2d50d190c..764c4d696 100644 --- a/src/recipe/custom_yaml.rs +++ b/src/recipe/custom_yaml.rs @@ -1045,6 +1045,36 @@ impl TryConvertNode for RenderedScalarNode { } } +impl TryConvertNode for RenderedNode { + fn try_convert(&self, name: &str) -> Result> { + self.as_scalar() + .ok_or_else(|| { + _partialerror!( + *self.span(), + ErrorKind::ExpectedScalar, + label = format!("expected a scalar value for `{name}`") + ) + }) + .map_err(|e| vec![e]) + .and_then(|s| s.try_convert(name)) + } +} + +impl TryConvertNode for RenderedScalarNode { + fn try_convert(&self, _name: &str) -> Result> { + self.as_str() + .parse() + .map_err(|err| { + _partialerror!( + *self.span(), + ErrorKind::from(err), + label = format!("failed to parse `{}` as unsigned integer", self.as_str()) + ) + }) + .map_err(|e| vec![e]) + } +} + impl TryConvertNode for RenderedScalarNode { fn try_convert(&self, _name: &str) -> Result> { self.as_str() diff --git a/src/recipe/parser.rs b/src/recipe/parser.rs index 693db14e8..fc813c53f 100644 --- a/src/recipe/parser.rs +++ b/src/recipe/parser.rs @@ -20,7 +20,8 @@ use crate::{ }; mod about; -mod build; +/// Build configuration and settings +pub mod build; mod cache; mod glob_vec; mod helper; diff --git a/src/recipe/parser/build.rs b/src/recipe/parser/build.rs index 3b1749435..a80951320 100644 --- a/src/recipe/parser/build.rs +++ b/src/recipe/parser/build.rs @@ -201,8 +201,11 @@ impl TryConvertNode for RenderedScalarNode { /// Post process operations for regex based replacements #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PostProcess { + /// Files to apply the regex replacement to pub files: GlobVec, + /// The regex pattern to match pub regex: SerializableRegex, + /// The replacement string pub replacement: String, } @@ -388,8 +391,10 @@ impl DynamicLinking { #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum LinkingCheckBehavior { + /// Ignore linking issues #[default] Ignore, + /// Treat linking issues as errors Error, }