Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 48 additions & 77 deletions src/cmd-diff
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import shutil
import subprocess
import sys
import tempfile
import time
from multiprocessing import Process

from dataclasses import dataclass
from enum import IntEnum
Expand Down Expand Up @@ -96,6 +94,11 @@ def main():
for differ in active_differs:
differ.function(diff_from, diff_to)

if args.artifact:
diff_artifact(diff_from, diff_to, args.artifact)
if args.artifact_part_table:
diff_artifact_partitions(diff_from, diff_to, args.artifact_part_table)

Comment on lines +97 to +101
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The functions diff_artifact and diff_artifact_partitions are called here but are not defined anywhere in the file. This will cause a NameError at runtime when using the --artifact or --artifact-part-table arguments.

To fix this, you need to implement these functions. They should likely act as dispatchers to the specific diffing functions based on the artifact name. For example:

# A map of artifact names to their diffing functions
ARTIFACT_DIFFERS = {
    'metal': diff_metal,
}

# A map of artifact names to their partition table diffing functions
ARTIFACT_PARTITION_DIFFERS = {
    'metal': diff_metal_partitions,
}

def diff_artifact(diff_from, diff_to, artifact_name):
    """Looks up and calls the diffing function for a given artifact."""
    differ = ARTIFACT_DIFFERS.get(artifact_name)
    if not differ:
        raise ValueError(f"No differ found for artifact: {artifact_name}")
    print(f"Diffing {artifact_name} artifact...")
    differ(diff_from, diff_to)

def diff_artifact_partitions(diff_from, diff_to, artifact_name):
    """Looks up and calls the partition diffing function for a given artifact."""
    differ = ARTIFACT_PARTITION_DIFFERS.get(artifact_name)
    if not differ:
        raise ValueError(f"No partition differ found for artifact: {artifact_name}")
    print(f"Diffing {artifact_name} artifact partition table...")
    differ(diff_from, diff_to)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. Maybe you didn't mean to include this bit in this PR?

if args.gc:
# some of the dirs in the rootfs are dumb and have "private" bits
runcmd(['find', DIFF_CACHE, '-type', 'd', '-exec', 'chmod', 'u+rwx', '{}', '+'])
Expand All @@ -109,6 +112,8 @@ def parse_args():
parser.add_argument("--to", dest='diff_to', help="Second build ID")
parser.add_argument("--gc", action='store_true', help="Delete cached diff content")
parser.add_argument("--arch", dest='arch', help="Architecture of builds")
parser.add_argument("--artifact", help="Diff artifact image content. e.g. 'metal'")
parser.add_argument("--artifact-part-table", help="Diff artifact disk image partition tables. e.g. 'metal'")

for differ in DIFFERS:
parser.add_argument("--" + differ.name, action='store_true', default=False,
Expand Down Expand Up @@ -318,87 +323,53 @@ def get_metal_path(build_target):
def diff_metal_partitions(diff_from, diff_to):
metal_from = get_metal_path(diff_from)
metal_to = get_metal_path(diff_to)
diff_cmd_outputs(['sgdisk', '-p'], metal_from, metal_to)


def run_guestfs_mount(image_path, mount_target):
"""This function runs in a background thread."""
g = None
try:
g = guestfs.GuestFS(python_return_dict=True)
g.set_backend("direct")
g.add_drive_opts(image_path, readonly=1)
g.launch()

# Mount the disks in the guestfs VM
root = g.findfs_label("root")
g.mount_ro(root, "/")
boot = g.findfs_label("boot")
g.mount_ro(boot, "/boot")
efi = g.findfs_label("EFI-SYSTEM")
g.mount_ro(efi, "/boot/efi")

# This is a blocking call that runs the FUSE server
g.mount_local(mount_target)
g.mount_local_run()

except Exception as e:
print(f"Error in guestfs process for {image_path}: {e}", file=sys.stderr)
finally:
if g:
g.close()
diff_cmd_outputs(['sfdisk', '--dump'], metal_from, metal_to)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this is probably worth its own commit (i.e. I'm interested in the reasoning and future people may be interested in that reasoning too).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def diff_metal(diff_from, diff_to):
metal_from = get_metal_path(diff_from)
metal_to = get_metal_path(diff_to)

mount_dir_from = os.path.join(cache_dir("metal"), diff_from.id)
mount_dir_to = os.path.join(cache_dir("metal"), diff_to.id)

for d in [mount_dir_from, mount_dir_to]:
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d)

# As the libreguest mount call is blocking until unmounted, let's
# do that in a separate thread
p_from = Process(target=run_guestfs_mount, args=(metal_from, mount_dir_from))
p_to = Process(target=run_guestfs_mount, args=(metal_to, mount_dir_to))

try:
p_from.start()
p_to.start()
# Wait for the FUSE mounts to be ready. We'll check for a known file.
for i, d in enumerate([mount_dir_from, mount_dir_to]):
p = p_from if i == 0 else p_to
timeout = 60 # seconds
start_time = time.time()
check_file = os.path.join(d, 'ostree')
while not os.path.exists(check_file):
time.sleep(1)
if time.time() - start_time > timeout:
raise Exception(f"Timeout waiting for mount in {d}")
if not p.is_alive():
raise Exception(f"A guestfs process for {os.path.basename(d)} died unexpectedly.")

# Now that the mounts are live, we can diff them
git_diff(mount_dir_from, mount_dir_to)

finally:
# Unmount the FUSE binds, this will make the guestfs mount calls return
runcmd(['fusermount', '-u', mount_dir_from], check=False)
runcmd(['fusermount', '-u', mount_dir_to], check=False)

# Ensure the background processes are terminated
def shutdown_process(process):
process.join(timeout=5)
if process.is_alive():
process.terminate()
process.join()

shutdown_process(p_from)
shutdown_process(p_to)
dir_from = os.path.join(cache_dir("metal"), diff_from.id)
dir_to = os.path.join(cache_dir("metal"), diff_to.id)

for image_path, target_dir in [(metal_from, dir_from), (metal_to, dir_to)]:
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I run cosa diff --metal && cosa diff --metal will the results of the previous run survive? i.e. it would be nice to detect if the work had already been done and use the previous work here.

os.makedirs(target_dir)

g = None
try:
g = guestfs.GuestFS(python_return_dict=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we're still using GuestFS here, which is completely fine (I'm not sure of another unprivileged way to mount things anyway), but I'm wondering if the previous problems about all files not being detected/used are taken care of with this PR?

i.e. if we're still using GuestFS, what's different now that fixes the problem?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was because I had I/O errors with FUSE so I thought dumping everything would be more reliable.

I can't say if it's more reliable, i haven't done enough testing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good!

g.set_backend("direct")
g.add_drive_opts(image_path, readonly=1)
g.launch()

# Mount the disks in the guestfs VM
root = g.findfs_label("root")
g.mount_ro(root, "/")
boot = g.findfs_label("boot")
g.mount_ro(boot, "/boot")
efi = g.findfs_label("EFI-SYSTEM")
g.mount_ro(efi, "/boot/efi")

# Exclude backing block devices from the tar
excludes = ["*backingFsBlockDev"]

with tempfile.NamedTemporaryFile(suffix=".tar", delete=True) as tmp_tar:
g.tar_out("/", tmp_tar.name, xattrs=True, selinux=True, excludes= excludes)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
g.tar_out("/", tmp_tar.name, xattrs=True, selinux=True, excludes= excludes)
g.tar_out("/", tmp_tar.name, xattrs=True, selinux=True, excludes=excludes)

# Extract the tarball
runcmd(['tar', '-xf', tmp_tar.name, '-C', target_dir])

except Exception as e:
print(f"Error in guestfs process for {image_path}: {e}", file=sys.stderr)
raise
finally:
if g:
g.close()
Comment on lines +367 to +369
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If an exception occurs in the try block, and g.close() also raises an exception in this finally block, the original exception will be hidden. This can make debugging more difficult.

To make the error handling more robust, you can wrap the call to g.close() in its own try...except block. This ensures that an error during cleanup doesn't mask the root cause of a failure.

Suggested change
finally:
if g:
g.close()
finally:
if g:
try:
g.close()
except Exception as e:
print(f"Error closing guestfs for {image_path}, ignoring: {e}", file=sys.stderr)


# Now that the contents are downloaded, we can diff them
git_diff(dir_from, dir_to)


def diff_cmd_outputs(cmd, file_from, file_to):
Expand Down
Loading