GCS (#2712)

doomedraven · web-flow · commit 654729d23499 · 2025-09-26T14:56:17.000Z
* GCS
diff --git a/conf/default/reporting.conf.default b/conf/default/reporting.conf.default
@@ -216,3 +216,21 @@ enabled = no
 
 [browserext]
 enabled = no
+
+# Google Cloud Storage - Store all copy of analysis foldr in GCS
+[gcs]
+enabled = no
+# The name of your Google Cloud Storage bucket where files will be uploaded.
+bucket_name = your-gcs-bucket-name
+
+# Comma-separated list of DIRECTORY names to exclude from the upload.
+# Good examples are 'shots' (contains all screenshots) or 'memory' (for full memory dumps).
+exclude_dirs = logs, shots
+
+# Comma-separated list of exact FILENAMES to exclude from the upload.
+# Good examples are large report formats you don't need in GCS.
+exclude_files =
+
+# The absolute path to your Google Cloud service account JSON key file.
+# This file is required for authentication.
+credentials_path = data/gcp-credentials.json
diff --git a/docs/book/src/installation/host/gcs.rst b/docs/book/src/installation/host/gcs.rst
@@ -0,0 +1,46 @@
+.. _installation-and-setup:
+
+Installation and Setup
+----------------------
+
+Follow these steps to install and configure the GCS reporting module in your CAPE Sandbox environment.
+
+Prerequisites: Google Cloud Setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Before installing the module, you need to prepare your Google Cloud environment.
+
+1.  **Create a GCS Bucket:** If you don't already have one, create a new bucket in the `Google Cloud Console <https://console.cloud.google.com/storage/browser>`_.
+
+2.  **Create a Service Account:**
+    * Go to **IAM & Admin** > **Service Accounts** in the Google Cloud Console.
+    * Click **Create Service Account** and give it a name (e.g., ``cape-sandbox-uploader``).
+    * Grant it the **Storage Object Creator** or **Storage Object Admin** role. This permission is necessary to write files to the bucket.
+
+3.  **Download JSON Key:**
+    * After creating the service account, go to its **Keys** tab.
+    * Click **Add Key** > **Create new key**.
+    * Select ``JSON`` as the key type and click **Create**. A JSON file will be downloaded.
+    * **Securely move this JSON file to your CAPE server**, for example, to ``/opt/CAPEv2/data/gcp-credentials.json``.
+
+    .. warning::
+       Do not place the credentials file in a publicly accessible directory.
+
+
+Module Installation and Configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1.  **Install the Python Library:**
+    The module depends on the official Google Cloud Storage library. Install it within your CAPE virtual environment.
+
+    .. note::
+       Install dependency ``poetry run pip install google-cloud-storage``.
+
+2.  **Update Configuration:**
+    * Edit ``/opt/CAPEv2/conf/reporting.conf``.
+    * ``[gcs]`` section, enable ``enabled=yes``.
+    * Set ``bucket_name`` to the name of your GCS bucket.
+    * Set ``credentials_path`` to the **absolute path** where you saved your service account JSON key file.
+
+3.  **Restart CAPE-processor:**
+    Restart the CAPE service: ``systemctl restart cape-processor`` for the changes to take effect.
diff --git a/docs/book/src/installation/host/index.rst b/docs/book/src/installation/host/index.rst
@@ -13,3 +13,4 @@ reference system for the commands examples.
     configuration
     routing
     cloud
+    gcs
diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py
@@ -0,0 +1,121 @@
+import os
+import logging
+from lib.cuckoo.common.constants import CUCKOO_ROOT
+from lib.cuckoo.common.abstracts import Report
+from lib.cuckoo.common.exceptions import CuckooReportError
+
+# Set up a logger for this module
+log = logging.getLogger(__name__)
+
+try:
+    # Import the Google Cloud Storage client library
+    from google.cloud import storage
+    from google.oauth2 import service_account
+
+    HAVE_GCS = True
+except ImportError:
+    HAVE_GCS = False
+
+
+class GCS(Report):
+    """
+    Uploads all analysis files to a Google Cloud Storage (GCS) bucket.
+    """
+
+    # This Report module is not executed by default
+    order = 9999
+
+    def run(self, results):
+        """
+        Run the Report module.
+
+        Args:
+            results (dict): The analysis results dictionary.
+        """
+        # Ensure the required library is installed
+        if not HAVE_GCS:
+            log.error(
+                "Failed to run GCS reporting module: the 'google-cloud-storage' "
+                "library is not installed. Please run 'poetry run pip install google-cloud-storage'."
+            )
+            return
+
+        # Read configuration options from gcs.conf
+        # Read configuration options from gcs.conf and validate them
+        bucket_name = self.options.get("bucket_name")
+        if not bucket_name:
+            raise CuckooReportError("GCS bucket_name is not configured in reporting.conf -> gcs")
+
+        credentials_path_str = self.options.get("credentials_path")
+        if not credentials_path_str:
+            raise CuckooReportError("GCS credentials_path is not configured in reporting.conf -> gcs")
+
+        credentials_path = os.path.join(CUCKOO_ROOT, credentials_path_str)
+        if not os.path.isfile(credentials_path):
+            raise CuckooReportError(
+                "GCS credentials_path '%s' is invalid or file does not exist in reporting.conf -> gcs", credentials_path
+            )
+
+        # Read the exclusion lists, defaulting to empty strings
+        exclude_dirs_str = self.options.get("exclude_dirs", "")
+        exclude_files_str = self.options.get("exclude_files", "")
+
+        # --- NEW: Parse the exclusion strings into sets for efficient lookups ---
+        # The `if item.strip()` ensures we don't have empty strings from trailing commas
+        exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()}
+        exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()}
+
+        if exclude_dirs:
+            log.debug("GCS reporting will exclude directories: %s", exclude_dirs)
+        if exclude_files:
+            log.debug("GCS reporting will exclude files: %s", exclude_files)
+
+        try:
+            # --- Authentication ---
+            log.debug("Authenticating with Google Cloud Storage...")
+            credentials = service_account.Credentials.from_service_account_file(credentials_path)
+            storage_client = storage.Client(credentials=credentials)
+            bucket = storage_client.bucket(bucket_name)
+
+            # Check if the bucket exists and is accessible
+            if not bucket.exists():
+                raise CuckooReportError(
+                    "The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name
+                )
+
+            # --- File Upload ---
+            # Use the analysis ID as a "folder" in the bucket
+            analysis_id = results.get("info", {}).get("id")
+            if not analysis_id:
+                raise CuckooReportError("Could not get analysis ID from results.")
+
+            log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name)
+
+            # self.analysis_path is the path to the analysis results directory
+            # e.g., /opt/cape/storage/analyses/123/
+            source_directory = self.analysis_path
+
+            for root, dirs, files in os.walk(source_directory):
+                # We modify 'dirs' in-place to prevent os.walk from descending into them.
+                # This is the most efficient way to skip entire directory trees.
+                dirs[:] = [d for d in dirs if d not in exclude_dirs]
+
+                for filename in files:
+                    # --- NEW: File Exclusion Logic ---
+                    if filename in exclude_files:
+                        log.debug("Skipping excluded file: %s", os.path.join(root, filename))
+                        continue  # Skip to the next file
+
+                    local_path = os.path.join(root, filename)
+                    relative_path = os.path.relpath(local_path, source_directory)
+                    blob_name = f"{analysis_id}/{relative_path}"
+
+                    log.debug("Uploading '%s' to '%s'", local_path, blob_name)
+
+                    blob = bucket.blob(blob_name)
+                    blob.upload_from_filename(local_path)
+
+            log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
+
+        except Exception as e:
+            raise CuckooReportError("Failed to upload report to GCS: %s", str(e))
diff --git a/web/web/middleware/__init__.py b/web/web/middleware/__init__.py
@@ -1,2 +1,3 @@
 from .custom_auth import CustomAuth  # noqa
 from .db_transaction import DBTransactionMiddleware  # noqa
+from .disable_auth_in_local import DisableAllauthMiddleware  # noqa
diff --git a/web/web/middleware/disable_auth_in_local.py b/web/web/middleware/disable_auth_in_local.py
@@ -0,0 +1,26 @@
+from allauth.account.middleware import AccountMiddleware
+
+
+class DisableAllauthMiddleware:
+    def __init__(self, get_response):
+        self.get_response = get_response
+        # Instantiate the real AllAuth middleware that we will be wrapping.
+        self.allauth_middleware = AccountMiddleware(get_response)
+
+    def __call__(self, request):
+        # Get the remote IP address, handling proxies.
+        remote_ip = request.META.get("HTTP_X_FORWARDED_FOR", request.META.get("REMOTE_ADDR", "")).split(",")[0].strip()
+
+        # Define the IPs for which we want to skip the middleware.
+        local_ips = ["127.0.0.1", "::1", "localhost"]
+
+        if remote_ip in local_ips:
+            # The IP is local. Skip the AllAuth middleware by calling
+            # the next middleware/view in the chain directly.
+            print("Skipping AllAuth middleware for local request.")  # Optional: for debugging
+            response = self.get_response(request)
+            return response
+        else:
+            # The IP is not local. Execute the AllAuth middleware as usual
+            # by calling its __call__ method.
+            return self.allauth_middleware(request)

-Original file line number
+Diff line change
     configuration
     routing
     cloud
 +    gcs
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from .custom_auth import CustomAuth # noqa`
`2`	`2`	`from .db_transaction import DBTransactionMiddleware # noqa`
	`3`	`+from .disable_auth_in_local import DisableAllauthMiddleware # noqa`