From 49ab0add2ab52c1bd0a99700954342a1999e1de0 Mon Sep 17 00:00:00 2001
From: doomedraven <doomedraven@gmail.com>
Date: Tue, 7 Oct 2025 09:55:00 +0000
Subject: [PATCH 1/5] gcs mode

---
 conf/default/reporting.conf.default |  4 ++
 modules/reporting/gcs.py            | 88 +++++++++++++++++++----------
 2 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/conf/default/reporting.conf.default b/conf/default/reporting.conf.default
index cc56bfa46a8..9dcd41fa34d 100644
--- a/conf/default/reporting.conf.default
+++ b/conf/default/reporting.conf.default
@@ -231,6 +231,10 @@ exclude_dirs = logs, shots
 # Good examples are large report formats you don't need in GCS.
 exclude_files =
 
+# Mode: zip - will submit all files and folders as unique zip archive. Useful to not spam pubsub notification on file creation.
+# Mode: file - will submit one by one.
+mode = zip
+
 # Can be vm or json
 auth_by = vm
 # only if auth_by = json. The absolute path to your Google Cloud service account JSON key file.
diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
index f0ded8d10a8..15e26bb98fd 100644
--- a/modules/reporting/gcs.py
+++ b/modules/reporting/gcs.py
@@ -1,5 +1,7 @@
 import os
 import logging
+import tempfile
+import zipfile
 from lib.cuckoo.common.constants import CUCKOO_ROOT
 from lib.cuckoo.common.abstracts import Report
 from lib.cuckoo.common.exceptions import CuckooReportError
@@ -40,7 +42,6 @@ def run(self, results):
             )
             return
 
-        # Read configuration options from gcs.conf
         # Read configuration options from gcs.conf and validate them
         bucket_name = self.options.get("bucket_name")
         if not bucket_name:
@@ -66,8 +67,7 @@ def run(self, results):
         exclude_dirs_str = self.options.get("exclude_dirs", "")
         exclude_files_str = self.options.get("exclude_files", "")
 
-        # --- NEW: Parse the exclusion strings into sets for efficient lookups ---
-        # The `if item.strip()` ensures we don't have empty strings from trailing commas
+        # Parse the exclusion strings into sets for efficient lookups
         exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()}
         exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()}
 
@@ -76,6 +76,9 @@ def run(self, results):
         if exclude_files:
             log.debug("GCS reporting will exclude files: %s", exclude_files)
 
+        # Get the upload mode, defaulting to 'file' for backward compatibility
+        mode = self.options.get("mode", "file")
+
         try:
             # --- Authentication ---
             log.debug("Authenticating with Google Cloud Storage...")
@@ -87,39 +90,62 @@ def run(self, results):
                     "The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name
                 )
 
-            # --- File Upload ---
-            # Use the analysis ID as a "folder" in the bucket
             analysis_id = results.get("info", {}).get("id")
             if not analysis_id:
                 raise CuckooReportError("Could not get analysis ID from results.")
 
-            log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name)
-
-            # self.analysis_path is the path to the analysis results directory
-            # e.g., /opt/cape/storage/analyses/123/
             source_directory = self.analysis_path
 
-            for root, dirs, files in os.walk(source_directory):
-                # We modify 'dirs' in-place to prevent os.walk from descending into them.
-                # This is the most efficient way to skip entire directory trees.
-                dirs[:] = [d for d in dirs if d not in exclude_dirs]
-
-                for filename in files:
-                    # --- NEW: File Exclusion Logic ---
-                    if filename in exclude_files:
-                        log.debug("Skipping excluded file: %s", os.path.join(root, filename))
-                        continue  # Skip to the next file
-
-                    local_path = os.path.join(root, filename)
-                    relative_path = os.path.relpath(local_path, source_directory)
-                    blob_name = f"{analysis_id}/{relative_path}"
-
-                    log.debug("Uploading '%s' to '%s'", local_path, blob_name)
-
-                    blob = bucket.blob(blob_name)
-                    blob.upload_from_filename(local_path)
-
-            log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
+            if mode == "zip":
+                self.upload_zip_archive(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
+            elif mode == "file":
+                self.upload_files_individually(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
+            else:
+                raise CuckooReportError("Invalid GCS upload mode specified: %s. Must be 'file' or 'zip'.", mode)
 
         except Exception as e:
-            raise CuckooReportError("Failed to upload report to GCS: %s", str(e))
+            raise CuckooReportError("Failed to upload report to GCS: %s", e)
+
+    def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
+        """Compresses and uploads the analysis directory as a single zip file."""
+        log.debug("Compressing and uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
+        zip_name = "%s.zip" % analysis_id
+        blob_name = zip_name
+
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_zip_file:
+            with zipfile.ZipFile(tmp_zip_file, "w", zipfile.ZIP_DEFLATED) as archive:
+                for root, dirs, files in os.walk(source_directory):
+                    dirs[:] = [d for d in dirs if d not in exclude_dirs]
+                    for filename in files:
+                        if filename in exclude_files:
+                            log.debug("Skipping excluded file: %s", os.path.join(root, filename))
+                            continue
+                        local_path = os.path.join(root, filename)
+                        relative_path = os.path.relpath(local_path, source_directory)
+                        archive.write(local_path, relative_path)
+
+            log.debug("Uploading '%s' to '%s'", tmp_zip_file.name, blob_name)
+            blob = bucket.blob(blob_name)
+            blob.upload_from_filename(tmp_zip_file.name)
+
+        os.unlink(tmp_zip_file.name)
+        log.info("Successfully uploaded archive for analysis %d to GCS.", analysis_id)
+
+    def upload_files_individually(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
+        """Uploads analysis files individually to the GCS bucket."""
+        log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
+        for root, dirs, files in os.walk(source_directory):
+            dirs[:] = [d for d in dirs if d not in exclude_dirs]
+            for filename in files:
+                if filename in exclude_files:
+                    log.debug("Skipping excluded file: %s", os.path.join(root, filename))
+                    continue
+                local_path = os.path.join(root, filename)
+                relative_path = os.path.relpath(local_path, source_directory)
+                blob_name = "%s/%s" % (analysis_id, relative_path)
+
+                log.debug("Uploading '%s' to '%s'", local_path, blob_name)
+                blob = bucket.blob(blob_name)
+                blob.upload_from_filename(local_path)
+
+        log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)

From 1ddc762a81e85802ed577e8cc26569a4f51361ec Mon Sep 17 00:00:00 2001
From: doomedraven <abrukhovetskyy@google.com>
Date: Tue, 7 Oct 2025 09:58:43 +0000
Subject: [PATCH 2/5] Update modules/reporting/gcs.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 modules/reporting/gcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
index 15e26bb98fd..55e6d6354f9 100644
--- a/modules/reporting/gcs.py
+++ b/modules/reporting/gcs.py
@@ -142,7 +142,7 @@ def upload_files_individually(self, bucket, analysis_id, source_directory, exclu
                     continue
                 local_path = os.path.join(root, filename)
                 relative_path = os.path.relpath(local_path, source_directory)
-                blob_name = "%s/%s" % (analysis_id, relative_path)
+                blob_name = f"{analysis_id}/{relative_path}"
 
                 log.debug("Uploading '%s' to '%s'", local_path, blob_name)
                 blob = bucket.blob(blob_name)

From 176c4ef6907d1c63e9e78734c130173718e341ca Mon Sep 17 00:00:00 2001
From: doomedraven <abrukhovetskyy@google.com>
Date: Tue, 7 Oct 2025 09:58:56 +0000
Subject: [PATCH 3/5] Update modules/reporting/gcs.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 modules/reporting/gcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
index 55e6d6354f9..6269c5ddc63 100644
--- a/modules/reporting/gcs.py
+++ b/modules/reporting/gcs.py
@@ -104,7 +104,7 @@ def run(self, results):
                 raise CuckooReportError("Invalid GCS upload mode specified: %s. Must be 'file' or 'zip'.", mode)
 
         except Exception as e:
-            raise CuckooReportError("Failed to upload report to GCS: %s", e)
+            raise CuckooReportError(f"Failed to upload report to GCS: {e}") from e
 
     def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
         """Compresses and uploads the analysis directory as a single zip file."""

From 007d9c722318b8f79f227c23d97766cad449b737 Mon Sep 17 00:00:00 2001
From: doomedraven <abrukhovetskyy@google.com>
Date: Tue, 7 Oct 2025 10:07:33 +0000
Subject: [PATCH 4/5] Update modules/reporting/gcs.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 modules/reporting/gcs.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
index 6269c5ddc63..b30d022a1a7 100644
--- a/modules/reporting/gcs.py
+++ b/modules/reporting/gcs.py
@@ -112,7 +112,8 @@ def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs
         zip_name = "%s.zip" % analysis_id
         blob_name = zip_name
 
-        with tempfile.NamedTemporaryFile(delete=False) as tmp_zip_file:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_zip_file:
+            tmp_zip_file_name = tmp_zip_file.name
             with zipfile.ZipFile(tmp_zip_file, "w", zipfile.ZIP_DEFLATED) as archive:
                 for root, dirs, files in os.walk(source_directory):
                     dirs[:] = [d for d in dirs if d not in exclude_dirs]
@@ -124,11 +125,12 @@ def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs
                         relative_path = os.path.relpath(local_path, source_directory)
                         archive.write(local_path, relative_path)
 
-            log.debug("Uploading '%s' to '%s'", tmp_zip_file.name, blob_name)
+        try:
+            log.debug("Uploading '%s' to '%s'", tmp_zip_file_name, blob_name)
             blob = bucket.blob(blob_name)
-            blob.upload_from_filename(tmp_zip_file.name)
-
-        os.unlink(tmp_zip_file.name)
+            blob.upload_from_filename(tmp_zip_file_name)
+        finally:
+            os.unlink(tmp_zip_file_name)
         log.info("Successfully uploaded archive for analysis %d to GCS.", analysis_id)
 
     def upload_files_individually(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):

From 83e346b27689125ae0066d7a66fa743456dd1e2a Mon Sep 17 00:00:00 2001
From: doomedraven <doomedraven@gmail.com>
Date: Tue, 7 Oct 2025 10:57:30 +0000
Subject: [PATCH 5/5] gcs mode

---
 modules/reporting/gcs.py | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
index b30d022a1a7..ff2fc56ece1 100644
--- a/modules/reporting/gcs.py
+++ b/modules/reporting/gcs.py
@@ -106,6 +106,21 @@ def run(self, results):
         except Exception as e:
             raise CuckooReportError(f"Failed to upload report to GCS: {e}") from e
 
+    def _iter_files_to_upload(self, source_directory, exclude_dirs, exclude_files):
+        """Generator that yields files to be uploaded, skipping excluded ones."""
+        for root, dirs, files in os.walk(source_directory):
+            # Exclude specified directories
+            dirs[:] = [d for d in dirs if d not in exclude_dirs]
+            for filename in files:
+                # Exclude specified files
+                if filename in exclude_files:
+                    log.debug("Skipping excluded file: %s", os.path.join(root, filename))
+                    continue
+
+                local_path = os.path.join(root, filename)
+                relative_path = os.path.relpath(local_path, source_directory)
+                yield local_path, relative_path
+
     def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
         """Compresses and uploads the analysis directory as a single zip file."""
         log.debug("Compressing and uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
@@ -115,15 +130,8 @@ def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs
         with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_zip_file:
             tmp_zip_file_name = tmp_zip_file.name
             with zipfile.ZipFile(tmp_zip_file, "w", zipfile.ZIP_DEFLATED) as archive:
-                for root, dirs, files in os.walk(source_directory):
-                    dirs[:] = [d for d in dirs if d not in exclude_dirs]
-                    for filename in files:
-                        if filename in exclude_files:
-                            log.debug("Skipping excluded file: %s", os.path.join(root, filename))
-                            continue
-                        local_path = os.path.join(root, filename)
-                        relative_path = os.path.relpath(local_path, source_directory)
-                        archive.write(local_path, relative_path)
+                for local_path, relative_path in self._iter_files_to_upload(source_directory, exclude_dirs, exclude_files):
+                    archive.write(local_path, relative_path)
 
         try:
             log.debug("Uploading '%s' to '%s'", tmp_zip_file_name, blob_name)
@@ -136,18 +144,10 @@ def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs
     def upload_files_individually(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
         """Uploads analysis files individually to the GCS bucket."""
         log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
-        for root, dirs, files in os.walk(source_directory):
-            dirs[:] = [d for d in dirs if d not in exclude_dirs]
-            for filename in files:
-                if filename in exclude_files:
-                    log.debug("Skipping excluded file: %s", os.path.join(root, filename))
-                    continue
-                local_path = os.path.join(root, filename)
-                relative_path = os.path.relpath(local_path, source_directory)
-                blob_name = f"{analysis_id}/{relative_path}"
-
-                log.debug("Uploading '%s' to '%s'", local_path, blob_name)
-                blob = bucket.blob(blob_name)
-                blob.upload_from_filename(local_path)
+        for local_path, relative_path in self._iter_files_to_upload(source_directory, exclude_dirs, exclude_files):
+            blob_name = f"{analysis_id}/{relative_path}"
+            log.debug("Uploading '%s' to '%s'", local_path, blob_name)
+            blob = bucket.blob(blob_name)
+            blob.upload_from_filename(local_path)
 
         log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)