Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .cursor/rules/overview.mdc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ The iOS analysis code ONLY has to work with `.xcarchive.zip` files as input.

# Python rules

ALWAYS USE THE `.venv/bin/python` VERSION WHEN RUNNING COMMANDS.

For the Python code make sure to follow all of Sentry's best practices, as well as modern Python best practices. Try to use types as much as possible. If standard repo setup is not present, feel free to configure it and add it to the repo since this is currently a bare setup.

For the CLI, make sure to use the `click` library.
Expand All @@ -32,4 +34,4 @@ For the Mach-O handling, use the `lief` library and follow best practices for th

Included is a `test/artifacts` directory which contains sample "clean room" apps that can be used for writing integration tests and validating the output of this tool. Always write new tests to validate behavior and functionality. Prefer to write integration tests using the sample apps instead of writing smaller unit tests or using mocks.

Make sure to write tests using `pytest`.
Make sure to write tests using `pytest`.
33 changes: 31 additions & 2 deletions src/launchpad/artifacts/apple/zipped_xcarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_plist(self) -> dict[str, Any]:
plist_data = plistlib.load(f)

self._plist = plist_data
return self._plist
return plist_data
except Exception as e:
raise RuntimeError(f"Failed to parse Info.plist: {e}")

Expand Down Expand Up @@ -223,12 +223,41 @@ def get_all_binary_paths(self) -> List[BinaryInfo]:
extension_name,
extension_binary_path,
extension_dsym_path,
is_main_binary=False,
is_main_binary=True, # App extension main executables are main binaries
)
)
except Exception as e:
logger.warning(f"Failed to read extension Info.plist at {extension_path}: {e}")

# Find Watch app binaries
for watch_path in app_bundle_path.rglob("Watch/*.app"):
if watch_path.is_dir():
watch_plist_path = watch_path / "Info.plist"
if watch_plist_path.exists():
try:
import plistlib

with open(watch_plist_path, "rb") as f:
watch_plist = plistlib.load(f)
watch_executable = watch_plist.get("CFBundleExecutable")
if watch_executable:
watch_binary_path = watch_path / watch_executable
watch_name = f"Watch/{watch_path.stem}/{watch_executable}"

watch_uuid = self._extract_binary_uuid(watch_binary_path)
watch_dsym_path = dsym_files.get(watch_uuid) if watch_uuid else None

binaries.append(
BinaryInfo(
watch_name,
watch_binary_path,
watch_dsym_path,
is_main_binary=True, # Watch app main executables are main binaries
)
)
except Exception as e:
logger.warning(f"Failed to read Watch app Info.plist at {watch_path}: {e}")

return binaries

def get_asset_catalog_details(self, relative_path: Path) -> List[AssetCatalogElement]:
Expand Down
16 changes: 12 additions & 4 deletions src/launchpad/size/analyzers/android.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
size=file_size,
file_type=file_type,
treemap_type=treemap_type,
hash_md5=file_hash,
hash=file_hash,
is_dir=False,
)
path_to_file_info["Dex"] = merged_dex_info
logger.debug("Created merged DEX representation: %s", relative_path)
Expand All @@ -190,7 +191,8 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
size=merged_size,
file_type=file_type,
treemap_type=treemap_type,
hash_md5="",
hash="",
is_dir=False,
)
path_to_file_info["Dex"] = merged_dex_info
continue
Expand All @@ -216,7 +218,8 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
size=merged_size,
file_type=file_type,
treemap_type=treemap_type,
hash_md5="",
hash="",
is_dir=False,
)
path_to_file_info[relative_path] = merged_file_info
else:
Expand All @@ -228,7 +231,8 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
size=file_size,
file_type=file_type,
treemap_type=treemap_type,
hash_md5=file_hash,
hash=file_hash,
is_dir=False,
)
path_to_file_info[relative_path] = file_info

Expand All @@ -242,8 +246,12 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
files_by_type[file_info.file_type] = []
files_by_type[file_info.file_type].append(file_info)

# Separate directories from files (though APKs typically don't have directory entries)
directories = [f for f in file_infos if f.is_dir]

return FileAnalysis(
files=file_infos,
directories=directories,
)

def _get_class_definitions(self, apks: list[APK]) -> list[ClassDefinition]:
Expand Down
132 changes: 5 additions & 127 deletions src/launchpad/size/analyzers/apple.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from __future__ import annotations

import subprocess

from pathlib import Path
from typing import Any, Dict, List, Tuple

Expand All @@ -16,7 +14,6 @@
from launchpad.parsers.apple.macho_symbol_sizes import MachOSymbolSizes
from launchpad.parsers.apple.objc_symbol_type_aggregator import ObjCSymbolTypeAggregator
from launchpad.parsers.apple.swift_symbol_type_aggregator import SwiftSymbolTypeAggregator
from launchpad.size.constants import APPLE_FILESYSTEM_BLOCK_SIZE
from launchpad.size.hermes.utils import make_hermes_reports
from launchpad.size.insights.apple.image_optimization import ImageOptimizationInsight
from launchpad.size.insights.apple.localized_strings import LocalizedStringsInsight
Expand All @@ -33,12 +30,11 @@
from launchpad.size.insights.common.large_images import LargeImageFileInsight
from launchpad.size.insights.common.large_videos import LargeVideoFileInsight
from launchpad.size.insights.insight import InsightsInput
from launchpad.size.models.common import FileAnalysis, FileInfo
from launchpad.size.models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType
from launchpad.size.treemap.treemap_builder import TreemapBuilder
from launchpad.size.utils.apple_bundle_size import calculate_bundle_sizes
from launchpad.size.utils.file_analysis import analyze_apple_files
from launchpad.utils.apple.code_signature_validator import CodeSignatureValidator
from launchpad.utils.file_utils import calculate_file_hash, get_file_size, to_nearest_block_size
from launchpad.utils.file_utils import get_file_size
from launchpad.utils.logging import get_logger
from launchpad.utils.performance import trace, trace_ctx

Expand Down Expand Up @@ -114,7 +110,7 @@ def analyze(self, artifact: AppleArtifact) -> AppleAnalysisResults:
app_info = self.app_info
logger.info(f"Analyzing app: {app_info.name} v{app_info.version}")

file_analysis = self._analyze_files(artifact)
file_analysis = analyze_apple_files(artifact)
logger.info(f"Found {file_analysis.file_count} files, total size: {file_analysis.total_size} bytes")

app_bundle_path = artifact.get_app_bundle_path()
Expand Down Expand Up @@ -249,46 +245,6 @@ def _extract_app_info(self, xcarchive: ZippedXCArchive) -> AppleAppInfo:
code_signature_errors=code_signature_errors,
)

@trace("apple.detect_file_type")
def _detect_file_type(self, file_path: Path) -> str:
"""Detect file type using the file command.

Args:
file_path: Path to the file to analyze

Returns:
File type string from file command output, normalized to common types
"""
try:
result = subprocess.run(["file", str(file_path)], capture_output=True, text=True, check=True)
# Extract just the file type description after the colon
file_type = result.stdout.split(":", 1)[1].strip().lower()
logger.debug(f"Detected file type for {file_path}: {file_type}")

# Normalize common file types
if "mach-o" in file_type:
return "macho"
elif "executable" in file_type:
return "executable"
elif "text" in file_type:
return "text"
elif "directory" in file_type:
return "directory"
elif "symbolic link" in file_type:
return "symlink"
elif "hermes javascript bytecode" in file_type:
return "hermes"
elif "empty" in file_type:
return "empty"

return file_type
except subprocess.CalledProcessError as e:
logger.warning(f"Failed to detect file type for {file_path}: {e}")
return "unknown"
except Exception as e:
logger.warning(f"Unexpected error detecting file type for {file_path}: {e}")
return "unknown"

def _get_profile_type(self, profile_data: dict[str, Any]) -> Tuple[str, str]:
"""Determine the type of provisioning profile and its name.
Args:
Expand Down Expand Up @@ -325,86 +281,6 @@ def _get_profile_type(self, profile_data: dict[str, Any]) -> Tuple[str, str]:
# If no devices are provisioned, it's an app store profile
return "appstore", profile_name

@trace("apple.analyze_files")
def _analyze_files(self, xcarchive: ZippedXCArchive) -> FileAnalysis:
"""Analyze all files in the app bundle."""
logger.debug("Analyzing files in app bundle")

files: List[FileInfo] = []
app_bundle_path = xcarchive.get_app_bundle_path()

# Walk through all files in the bundle
for file_path in app_bundle_path.rglob("*"):
if not file_path.is_file():
continue

relative_path = file_path.relative_to(app_bundle_path)
file_size = to_nearest_block_size(get_file_size(file_path), APPLE_FILESYSTEM_BLOCK_SIZE)

# Get file type from extension first
# If no extension or unknown type, use file command
file_type = file_path.suffix.lower().lstrip(".")
if not file_type or file_type == "unknown":
file_type = self._detect_file_type(file_path)

# Calculate hash for duplicate detection
file_hash = calculate_file_hash(file_path, algorithm="md5")

children: List[FileInfo] = []
if file_type == "car":
children = self._analyze_asset_catalog(xcarchive, relative_path)
children_size = sum([child.size for child in children])
children.append(
FileInfo(
full_path=file_path,
path=str(relative_path) + "/Other",
size=file_size - children_size,
file_type="unknown",
hash_md5=file_hash,
treemap_type=TreemapType.ASSETS,
children=[],
)
)

file_info = FileInfo(
full_path=file_path,
path=str(relative_path),
size=file_size,
file_type=file_type or "unknown",
hash_md5=file_hash,
treemap_type=FILE_TYPE_TO_TREEMAP_TYPE.get(file_type, TreemapType.FILES),
children=children,
)

files.append(file_info)

return FileAnalysis(files=files)

@trace("apple.analyze_asset_catalog")
def _analyze_asset_catalog(self, xcarchive: ZippedXCArchive, relative_path: Path) -> List[FileInfo]:
"""Analyze an asset catalog file."""
catalog_details = xcarchive.get_asset_catalog_details(relative_path)
result: List[FileInfo] = []
for element in catalog_details:
if element.full_path and element.full_path.exists() and element.full_path.is_file():
file_hash = calculate_file_hash(element.full_path, algorithm="md5")
else:
# not every element is backed by a file, so use imageId as hash
file_hash = element.image_id

result.append(
FileInfo(
full_path=element.full_path,
path=str(relative_path) + "/" + element.name,
size=element.size,
file_type=Path(element.full_path).suffix.lstrip(".") if element.full_path else "other",
hash_md5=file_hash,
treemap_type=TreemapType.ASSETS,
children=[],
)
)
return result

def _generate_insight_with_tracing(
self, insight_class: type, insights_input: InsightsInput, insight_name: str
) -> Any:
Expand Down Expand Up @@ -506,6 +382,8 @@ def _test_strip_symbols_removal(self, parser: MachOParser, binary_path: Path) ->
"""Test actual symbol removal using LIEF to get real size savings, similar to what strip does."""
import tempfile

return 0

try:
with trace_ctx("strip_symbols.get_original_size"):
original_size = binary_path.stat().st_size
Expand Down
6 changes: 4 additions & 2 deletions src/launchpad/size/insights/apple/localized_strings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.apple import LocalizedStringInsightResult
from launchpad.size.models.common import FileInfo
from launchpad.size.models.insights import FileSavingsResult


class LocalizedStringsInsight(Insight[LocalizedStringInsightResult]):
Expand All @@ -25,10 +26,11 @@ def generate(self, input: InsightsInput) -> LocalizedStringInsightResult | None:
localized_files.append(file_info)
total_size += file_info.size

# Only return insight if total size exceeds threshold
if total_size > self.THRESHOLD_BYTES:
file_savings = [FileSavingsResult(file_path=file.path, total_savings=file.size) for file in localized_files]

return LocalizedStringInsightResult(
files=localized_files,
files=file_savings,
total_savings=total_size,
)

Expand Down
43 changes: 27 additions & 16 deletions src/launchpad/size/insights/apple/main_binary_export_metadata.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,42 @@
from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.apple import MachOBinaryAnalysis, MainBinaryExportMetadataResult
from launchpad.size.models.insights import FileSavingsResult


class MainBinaryExportMetadataInsight(Insight[MainBinaryExportMetadataResult]):
"""Insight for analyzing the exported symbols metadata in the main binary."""
"""Insight for analyzing the exported symbols metadata in all main binaries."""

MIN_EXPORTS_THRESHOLD = 1024

def generate(self, input: InsightsInput) -> MainBinaryExportMetadataResult | None:
"""Generate insight for main binary exported symbols analysis."""
"""Generate insight for all main binary exported symbols analysis."""

main_binary_analysis = None
export_files: list[FileSavingsResult] = []

# Analyze all main binaries (main app, app extensions, watch apps)
for analysis in input.binary_analysis:
if isinstance(analysis, MachOBinaryAnalysis) and analysis.is_main_binary:
main_binary_analysis = analysis
break

if not main_binary_analysis or not main_binary_analysis.binary_analysis:
if not analysis.binary_analysis:
continue

# Look for dyld_exports_trie component in this main binary
for component in analysis.binary_analysis.components:
if component.name == "dyld_exports_trie":
if component.size >= self.MIN_EXPORTS_THRESHOLD:
export_files.append(
FileSavingsResult(
file_path=analysis.binary_relative_path,
total_savings=component.size,
)
)
break

if not export_files:
return None

dyld_exports_trie_component = None
for component in main_binary_analysis.binary_analysis.components:
if component.name == "dyld_exports_trie":
dyld_exports_trie_component = component
break

if not dyld_exports_trie_component or dyld_exports_trie_component.size < self.MIN_EXPORTS_THRESHOLD:
return None
total_savings = sum(file.total_savings for file in export_files)

return MainBinaryExportMetadataResult(total_savings=dyld_exports_trie_component.size)
return MainBinaryExportMetadataResult(
total_savings=total_savings,
files=export_files,
)
Loading