Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions conf/default/reporting.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ exclude_dirs = logs, shots
# Good examples are large report formats you don't need in GCS.
exclude_files =

# Mode: zip - will submit all files and folders as unique zip archive. Useful to not spam pubsub notification on file creation.
# Mode: file - will submit one by one.
mode = zip

# Can be vm or json
auth_by = vm
# only if auth_by = json. The absolute path to your Google Cloud service account JSON key file.
Expand Down
88 changes: 57 additions & 31 deletions modules/reporting/gcs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import logging
import tempfile
import zipfile
from lib.cuckoo.common.constants import CUCKOO_ROOT
from lib.cuckoo.common.abstracts import Report
from lib.cuckoo.common.exceptions import CuckooReportError
Expand Down Expand Up @@ -40,7 +42,6 @@ def run(self, results):
)
return

# Read configuration options from gcs.conf
# Read configuration options from gcs.conf and validate them
bucket_name = self.options.get("bucket_name")
if not bucket_name:
Expand All @@ -66,8 +67,7 @@ def run(self, results):
exclude_dirs_str = self.options.get("exclude_dirs", "")
exclude_files_str = self.options.get("exclude_files", "")

# --- NEW: Parse the exclusion strings into sets for efficient lookups ---
# The `if item.strip()` ensures we don't have empty strings from trailing commas
# Parse the exclusion strings into sets for efficient lookups
exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()}
exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()}

Expand All @@ -76,6 +76,9 @@ def run(self, results):
if exclude_files:
log.debug("GCS reporting will exclude files: %s", exclude_files)

# Get the upload mode, defaulting to 'file' for backward compatibility
mode = self.options.get("mode", "file")

try:
# --- Authentication ---
log.debug("Authenticating with Google Cloud Storage...")
Expand All @@ -87,39 +90,62 @@ def run(self, results):
"The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name
)

# --- File Upload ---
# Use the analysis ID as a "folder" in the bucket
analysis_id = results.get("info", {}).get("id")
if not analysis_id:
raise CuckooReportError("Could not get analysis ID from results.")

log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name)

# self.analysis_path is the path to the analysis results directory
# e.g., /opt/cape/storage/analyses/123/
source_directory = self.analysis_path

for root, dirs, files in os.walk(source_directory):
# We modify 'dirs' in-place to prevent os.walk from descending into them.
# This is the most efficient way to skip entire directory trees.
dirs[:] = [d for d in dirs if d not in exclude_dirs]

for filename in files:
# --- NEW: File Exclusion Logic ---
if filename in exclude_files:
log.debug("Skipping excluded file: %s", os.path.join(root, filename))
continue # Skip to the next file

local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, source_directory)
blob_name = f"{analysis_id}/{relative_path}"

log.debug("Uploading '%s' to '%s'", local_path, blob_name)

blob = bucket.blob(blob_name)
blob.upload_from_filename(local_path)

log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
if mode == "zip":
self.upload_zip_archive(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
elif mode == "file":
self.upload_files_individually(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
else:
raise CuckooReportError("Invalid GCS upload mode specified: %s. Must be 'file' or 'zip'.", mode)

except Exception as e:
raise CuckooReportError("Failed to upload report to GCS: %s", str(e))
raise CuckooReportError("Failed to upload report to GCS: %s", e)

def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
"""Compresses and uploads the analysis directory as a single zip file."""
log.debug("Compressing and uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
zip_name = "%s.zip" % analysis_id
blob_name = zip_name

with tempfile.NamedTemporaryFile(delete=False) as tmp_zip_file:
with zipfile.ZipFile(tmp_zip_file, "w", zipfile.ZIP_DEFLATED) as archive:
for root, dirs, files in os.walk(source_directory):
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for filename in files:
if filename in exclude_files:
log.debug("Skipping excluded file: %s", os.path.join(root, filename))
continue
local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, source_directory)
archive.write(local_path, relative_path)

log.debug("Uploading '%s' to '%s'", tmp_zip_file.name, blob_name)
blob = bucket.blob(blob_name)
blob.upload_from_filename(tmp_zip_file.name)

os.unlink(tmp_zip_file.name)
log.info("Successfully uploaded archive for analysis %d to GCS.", analysis_id)

def upload_files_individually(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
"""Uploads analysis files individually to the GCS bucket."""
log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
for root, dirs, files in os.walk(source_directory):
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for filename in files:
if filename in exclude_files:
log.debug("Skipping excluded file: %s", os.path.join(root, filename))
continue
local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, source_directory)
blob_name = "%s/%s" % (analysis_id, relative_path)

log.debug("Uploading '%s' to '%s'", local_path, blob_name)
blob = bucket.blob(blob_name)
blob.upload_from_filename(local_path)

log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
Loading