Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .cursor/rules/overview.mdc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ The iOS analysis code ONLY has to work with `.xcarchive.zip` files as input.

# Python rules

ALWAYS USE THE `.venv/bin/python` VERSION WHEN RUNNING COMMANDS.

For the Python code make sure to follow all of Sentry's best practices, as well as modern Python best practices. Try to use types as much as possible. If standard repo setup is not present, feel free to configure it and add it to the repo since this is currently a bare setup.

For the CLI, make sure to use the `click` library.
Expand All @@ -32,4 +34,4 @@ For the Mach-O handling, use the `lief` library and follow best practices for th

Included is a `test/artifacts` directory which contains sample "clean room" apps that can be used for writing integration tests and validating the output of this tool. Always write new tests to validate behavior and functionality. Prefer to write integration tests using the sample apps instead of writing smaller unit tests or using mocks.

Make sure to write tests using `pytest`.
Make sure to write tests using `pytest`.
33 changes: 31 additions & 2 deletions src/launchpad/artifacts/apple/zipped_xcarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_plist(self) -> dict[str, Any]:
plist_data = plistlib.load(f)

self._plist = plist_data
return self._plist
return plist_data
except Exception as e:
raise RuntimeError(f"Failed to parse Info.plist: {e}")

Expand Down Expand Up @@ -223,12 +223,41 @@ def get_all_binary_paths(self) -> List[BinaryInfo]:
extension_name,
extension_binary_path,
extension_dsym_path,
is_main_binary=False,
is_main_binary=True, # App extension main executables are main binaries
)
)
except Exception as e:
logger.warning(f"Failed to read extension Info.plist at {extension_path}: {e}")

# Find Watch app binaries
for watch_path in app_bundle_path.rglob("Watch/*.app"):
if watch_path.is_dir():
watch_plist_path = watch_path / "Info.plist"
if watch_plist_path.exists():
try:
import plistlib

with open(watch_plist_path, "rb") as f:
watch_plist = plistlib.load(f)
watch_executable = watch_plist.get("CFBundleExecutable")
if watch_executable:
watch_binary_path = watch_path / watch_executable
watch_name = f"Watch/{watch_path.stem}/{watch_executable}"

watch_uuid = self._extract_binary_uuid(watch_binary_path)
watch_dsym_path = dsym_files.get(watch_uuid) if watch_uuid else None

binaries.append(
BinaryInfo(
watch_name,
watch_binary_path,
watch_dsym_path,
is_main_binary=True, # Watch app main executables are main binaries
)
)
except Exception as e:
logger.warning(f"Failed to read Watch app Info.plist at {watch_path}: {e}")

return binaries

def get_asset_catalog_details(self, relative_path: Path) -> List[AssetCatalogElement]:
Expand Down
6 changes: 4 additions & 2 deletions src/launchpad/size/insights/apple/localized_strings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.apple import LocalizedStringInsightResult
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import FileSavingsResult


class LocalizedStringsInsight(Insight[LocalizedStringInsightResult]):
Expand All @@ -25,10 +26,11 @@ def generate(self, input: InsightsInput) -> LocalizedStringInsightResult | None:
localized_files.append(file_info)
total_size += file_info.size

# Only return insight if total size exceeds threshold
if total_size > self.THRESHOLD_BYTES:
file_savings = [FileSavingsResult(file_path=file.path, total_savings=file.size) for file in localized_files]

return LocalizedStringInsightResult(
files=localized_files,
files=file_savings,
total_savings=total_size,
)

Expand Down
43 changes: 27 additions & 16 deletions src/launchpad/size/insights/apple/main_binary_export_metadata.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,42 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.apple import MachOBinaryAnalysis, MainBinaryExportMetadataResult
from launchpad.size.models.insights import FileSavingsResult


class MainBinaryExportMetadataInsight(Insight[MainBinaryExportMetadataResult]):
"""Insight for analyzing the exported symbols metadata in the main binary."""
"""Insight for analyzing the exported symbols metadata in all main binaries."""

MIN_EXPORTS_THRESHOLD = 1024

def generate(self, input: InsightsInput) -> MainBinaryExportMetadataResult | None:
"""Generate insight for main binary exported symbols analysis."""
"""Generate insight for all main binary exported symbols analysis."""

main_binary_analysis = None
export_files: list[FileSavingsResult] = []

# Analyze all main binaries (main app, app extensions, watch apps)
for analysis in input.binary_analysis:
if isinstance(analysis, MachOBinaryAnalysis) and analysis.is_main_binary:
main_binary_analysis = analysis
break

if not main_binary_analysis or not main_binary_analysis.binary_analysis:
if not analysis.binary_analysis:
continue

# Look for dyld_exports_trie component in this main binary
for component in analysis.binary_analysis.components:
if component.name == "dyld_exports_trie":
if component.size >= self.MIN_EXPORTS_THRESHOLD:
export_files.append(
FileSavingsResult(
file_path=analysis.binary_relative_path,
total_savings=component.size,
)
)
break

if not export_files:
return None

dyld_exports_trie_component = None
for component in main_binary_analysis.binary_analysis.components:
if component.name == "dyld_exports_trie":
dyld_exports_trie_component = component
break

if not dyld_exports_trie_component or dyld_exports_trie_component.size < self.MIN_EXPORTS_THRESHOLD:
return None
total_savings = sum(file.total_savings for file in export_files)

return MainBinaryExportMetadataResult(total_savings=dyld_exports_trie_component.size)
return MainBinaryExportMetadataResult(
total_savings=total_savings,
files=export_files,
)
15 changes: 11 additions & 4 deletions src/launchpad/size/insights/apple/small_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.apple import SmallFilesInsightResult
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import FileSavingsResult


class SmallFilesInsight(Insight[SmallFilesInsightResult]):
Expand All @@ -26,13 +27,19 @@ def generate(self, input: InsightsInput) -> SmallFilesInsightResult | None:
for file_info in input.file_analysis.files:
if file_info.size < APPLE_FILESYSTEM_BLOCK_SIZE:
small_files.append(file_info)
# Calculate wasted space due to block size alignment
wasted_space = APPLE_FILESYSTEM_BLOCK_SIZE - file_info.size
total_savings += wasted_space
total_savings += APPLE_FILESYSTEM_BLOCK_SIZE - file_info.size

if len(small_files) > 0:
file_savings = [
FileSavingsResult(
file_path=file.path,
total_savings=APPLE_FILESYSTEM_BLOCK_SIZE - file.size,
)
for file in small_files
]

return SmallFilesInsightResult(
files=small_files,
files=file_savings,
file_count=len(small_files),
total_savings=total_savings,
)
Expand Down
5 changes: 3 additions & 2 deletions src/launchpad/size/insights/apple/unnecessary_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import UnnecessaryFilesInsightResult
from launchpad.size.models.insights import FileSavingsResult, UnnecessaryFilesInsightResult


class UnnecessaryFilesInsight(Insight[UnnecessaryFilesInsightResult]):
Expand Down Expand Up @@ -48,9 +48,10 @@ def generate(self, input: InsightsInput) -> UnnecessaryFilesInsightResult | None

if unnecessary_files:
unnecessary_files.sort(key=lambda f: f.size, reverse=True)
files = [FileSavingsResult(file_path=file.path, total_savings=file.size) for file in unnecessary_files]

return UnnecessaryFilesInsightResult(
files=unnecessary_files,
files=files,
total_savings=total_size,
)

Expand Down
145 changes: 113 additions & 32 deletions src/launchpad/size/insights/common/duplicate_files.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,126 @@
import hashlib
import os

from collections import defaultdict
from pathlib import Path
from typing import Dict, List

from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import DuplicateFileGroup, DuplicateFilesInsightResult
from launchpad.size.models.common import FileInfo, TreemapType
from launchpad.size.models.insights import (
DuplicateFileGroup,
DuplicateFilesInsightResult,
)


class DuplicateFilesInsight(Insight[DuplicateFilesInsightResult]):
def generate(self, input: InsightsInput) -> DuplicateFilesInsightResult:
files_by_hash: Dict[str, List[FileInfo]] = defaultdict(list)
for file in input.file_analysis.files:
if file.hash_md5:
files_by_hash[file.hash_md5].append(file)
EXTENSION_ALLOWLIST = [".xcprivacy"]

# Make sure to group all duplicates in a directory that has one of these extensions
DIRECTORY_EXTENSIONS = [".bundle"]

def generate(self, input: InsightsInput) -> DuplicateFilesInsightResult | None:
groups: List[DuplicateFileGroup] = []
total_savings = 0

for file_list in files_by_hash.values():
if len(file_list) > 1:
# Calculate savings: total size - size of one copy we keep
total_file_size = sum(f.size for f in file_list)
savings_for_this_group = total_file_size - file_list[0].size

if savings_for_this_group > 0: # Only include if there are actual savings
sorted_files = sorted(file_list, key=lambda f: (-f.size, f.path))
filenames = sorted(set(os.path.basename(f.path) for f in sorted_files))
group_filename = filenames[0]

group = DuplicateFileGroup(
filename=group_filename,
files=sorted_files,
total_savings=savings_for_this_group,
)
groups.append(group)
total_savings += savings_for_this_group

groups = sorted(groups, key=lambda g: (-g.total_savings, g.filename))

return DuplicateFilesInsightResult(
groups=groups,
total_savings=total_savings,
)
covered_containers: set[str] = set()
for infos in self._duplicate_directories(input.file_analysis.files).values():
if len(infos) < 2:
continue

infos.sort(key=lambda f: (-f.size, f.path))
group_size = sum(fi.size for fi in infos)
savings = group_size - infos[0].size
if savings <= 0:
continue

groups.append(
DuplicateFileGroup(
filename=os.path.basename(infos[0].path),
files=infos,
total_savings=savings,
)
)
total_savings += savings
for info in infos:
covered_containers.add(info.path)

files_by_hash: Dict[str, List[FileInfo]] = defaultdict(list)
for f in input.file_analysis.files:
if (
f.hash_md5
and not self._is_allowed_extension(f.path)
and not any(f.path.startswith(c + "/") or f.path == c for c in covered_containers) # ← NEW GUARD
):
files_by_hash[f.hash_md5].append(f)

for dup_files in files_by_hash.values():
if len(dup_files) < 2:
continue

dup_files.sort(key=lambda f: (-f.size, f.path))
savings = sum(f.size for f in dup_files) - dup_files[0].size
if savings <= 0:
continue

container = self._directory_grouping(dup_files[0].path)
name = os.path.basename(container) if container else os.path.basename(dup_files[0].path)

groups.append(
DuplicateFileGroup(
filename=name,
files=dup_files,
total_savings=savings,
)
)
total_savings += savings

groups.sort(key=lambda g: (-g.total_savings, g.filename))

if len(groups) > 0:
return DuplicateFilesInsightResult(groups=groups, total_savings=total_savings)

return None

def _is_allowed_extension(self, file_path: str) -> bool:
return any(file_path.endswith(ext) for ext in self.EXTENSION_ALLOWLIST)

def _directory_grouping(self, file_path: str) -> str | None:
p = Path(file_path)
for i, part in enumerate(p.parts):
if any(part.endswith(ext) for ext in self.DIRECTORY_EXTENSIONS):
return str(Path(*p.parts[: i + 1]))
return None

def _duplicate_directories(self, files: List[FileInfo]) -> Dict[str, List[FileInfo]]:
dir_to_children: Dict[str, List[FileInfo]] = defaultdict(list)
for f in files:
if f.hash_md5:
root = self._directory_grouping(f.path)
if root:
dir_to_children[root].append(f)

dup_dirs: Dict[str, List[FileInfo]] = defaultdict(list)
for root, children in dir_to_children.items():
if not children:
continue

md5 = hashlib.md5()
for h in sorted(c.hash_md5 for c in children if c.hash_md5):
md5.update(h.encode())
folder_hash = md5.hexdigest()

dup_dirs[folder_hash].append(
FileInfo(
full_path=(
children[0].full_path.parent / root if children[0].full_path is not None else Path(root)
),
path=root,
size=sum(c.size for c in children),
file_type="directory",
hash_md5=folder_hash,
treemap_type=TreemapType.FILES,
children=children,
)
)
return dup_dirs
5 changes: 4 additions & 1 deletion src/launchpad/size/insights/common/hermes_debug_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import (
FileSavingsResult,
HermesDebugInfoInsightResult,
)

Expand Down Expand Up @@ -47,7 +48,9 @@ def generate(self, input: InsightsInput) -> HermesDebugInfoInsightResult | None:
reverse=True,
)

files = [FileSavingsResult(file_path=file.path, total_savings=file.size) for file in files_with_debug_info]

return HermesDebugInfoInsightResult(
files=files_with_debug_info,
files=files,
total_savings=total_savings,
)
Loading