|
| 1 | +import os |
| 2 | +import logging |
| 3 | +from lib.cuckoo.common.constants import CUCKOO_ROOT |
| 4 | +from lib.cuckoo.common.abstracts import Report |
| 5 | +from lib.cuckoo.common.exceptions import CuckooReportError |
| 6 | + |
| 7 | +# Set up a logger for this module |
| 8 | +log = logging.getLogger(__name__) |
| 9 | + |
| 10 | +try: |
| 11 | + # Import the Google Cloud Storage client library |
| 12 | + from google.cloud import storage |
| 13 | + from google.oauth2 import service_account |
| 14 | + |
| 15 | + HAVE_GCS = True |
| 16 | +except ImportError: |
| 17 | + HAVE_GCS = False |
| 18 | + |
| 19 | + |
| 20 | +class GCS(Report): |
| 21 | + """ |
| 22 | + Uploads all analysis files to a Google Cloud Storage (GCS) bucket. |
| 23 | + """ |
| 24 | + |
| 25 | + # This Report module is not executed by default |
| 26 | + order = 9999 |
| 27 | + |
| 28 | + def run(self, results): |
| 29 | + """ |
| 30 | + Run the Report module. |
| 31 | +
|
| 32 | + Args: |
| 33 | + results (dict): The analysis results dictionary. |
| 34 | + """ |
| 35 | + # Ensure the required library is installed |
| 36 | + if not HAVE_GCS: |
| 37 | + log.error( |
| 38 | + "Failed to run GCS reporting module: the 'google-cloud-storage' " |
| 39 | + "library is not installed. Please run 'poetry run pip install google-cloud-storage'." |
| 40 | + ) |
| 41 | + return |
| 42 | + |
| 43 | + # Read configuration options from gcs.conf |
| 44 | + # Read configuration options from gcs.conf and validate them |
| 45 | + bucket_name = self.options.get("bucket_name") |
| 46 | + if not bucket_name: |
| 47 | + raise CuckooReportError("GCS bucket_name is not configured in reporting.conf -> gcs") |
| 48 | + |
| 49 | + credentials_path_str = self.options.get("credentials_path") |
| 50 | + if not credentials_path_str: |
| 51 | + raise CuckooReportError("GCS credentials_path is not configured in reporting.conf -> gcs") |
| 52 | + |
| 53 | + credentials_path = os.path.join(CUCKOO_ROOT, credentials_path_str) |
| 54 | + if not os.path.isfile(credentials_path): |
| 55 | + raise CuckooReportError( |
| 56 | + "GCS credentials_path '%s' is invalid or file does not exist in reporting.conf -> gcs", credentials_path |
| 57 | + ) |
| 58 | + |
| 59 | + # Read the exclusion lists, defaulting to empty strings |
| 60 | + exclude_dirs_str = self.options.get("exclude_dirs", "") |
| 61 | + exclude_files_str = self.options.get("exclude_files", "") |
| 62 | + |
| 63 | + # --- NEW: Parse the exclusion strings into sets for efficient lookups --- |
| 64 | + # The `if item.strip()` ensures we don't have empty strings from trailing commas |
| 65 | + exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()} |
| 66 | + exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()} |
| 67 | + |
| 68 | + if exclude_dirs: |
| 69 | + log.debug("GCS reporting will exclude directories: %s", exclude_dirs) |
| 70 | + if exclude_files: |
| 71 | + log.debug("GCS reporting will exclude files: %s", exclude_files) |
| 72 | + |
| 73 | + try: |
| 74 | + # --- Authentication --- |
| 75 | + log.debug("Authenticating with Google Cloud Storage...") |
| 76 | + credentials = service_account.Credentials.from_service_account_file(credentials_path) |
| 77 | + storage_client = storage.Client(credentials=credentials) |
| 78 | + bucket = storage_client.bucket(bucket_name) |
| 79 | + |
| 80 | + # Check if the bucket exists and is accessible |
| 81 | + if not bucket.exists(): |
| 82 | + raise CuckooReportError( |
| 83 | + "The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name |
| 84 | + ) |
| 85 | + |
| 86 | + # --- File Upload --- |
| 87 | + # Use the analysis ID as a "folder" in the bucket |
| 88 | + analysis_id = results.get("info", {}).get("id") |
| 89 | + if not analysis_id: |
| 90 | + raise CuckooReportError("Could not get analysis ID from results.") |
| 91 | + |
| 92 | + log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name) |
| 93 | + |
| 94 | + # self.analysis_path is the path to the analysis results directory |
| 95 | + # e.g., /opt/cape/storage/analyses/123/ |
| 96 | + source_directory = self.analysis_path |
| 97 | + |
| 98 | + for root, dirs, files in os.walk(source_directory): |
| 99 | + # We modify 'dirs' in-place to prevent os.walk from descending into them. |
| 100 | + # This is the most efficient way to skip entire directory trees. |
| 101 | + dirs[:] = [d for d in dirs if d not in exclude_dirs] |
| 102 | + |
| 103 | + for filename in files: |
| 104 | + # --- NEW: File Exclusion Logic --- |
| 105 | + if filename in exclude_files: |
| 106 | + log.debug("Skipping excluded file: %s", os.path.join(root, filename)) |
| 107 | + continue # Skip to the next file |
| 108 | + |
| 109 | + local_path = os.path.join(root, filename) |
| 110 | + relative_path = os.path.relpath(local_path, source_directory) |
| 111 | + blob_name = f"{analysis_id}/{relative_path}" |
| 112 | + |
| 113 | + log.debug("Uploading '%s' to '%s'", local_path, blob_name) |
| 114 | + |
| 115 | + blob = bucket.blob(blob_name) |
| 116 | + blob.upload_from_filename(local_path) |
| 117 | + |
| 118 | + log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id) |
| 119 | + |
| 120 | + except Exception as e: |
| 121 | + raise CuckooReportError("Failed to upload report to GCS: %s", str(e)) |
0 commit comments