kevoreilly
diff --git a/‎conf/default/cuckoo.conf.default‎
Lines changed: 4 additions & 1 deletion b/‎conf/default/cuckoo.conf.default‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎conf/default/web.conf.default‎
Lines changed: 1 addition & 0 deletions b/‎conf/default/web.conf.default‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎extra/libvirt_installer.sh‎
Lines changed: 1 addition & 1 deletion b/‎extra/libvirt_installer.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎installer/kvm-qemu.sh‎
Lines changed: 1 addition & 1 deletion b/‎installer/kvm-qemu.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/cuckoo/common/cape_utils.py‎
Lines changed: 79 additions & 19 deletions b/‎lib/cuckoo/common/cape_utils.py‎
Lines changed: 79 additions & 19 deletions
diff --git a/‎lib/cuckoo/common/dotnet_utils.py‎
Lines changed: 16 additions & 1 deletion b/‎lib/cuckoo/common/dotnet_utils.py‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎lib/cuckoo/common/extractor_utils.py‎
Lines changed: 66 additions & 1 deletion b/‎lib/cuckoo/common/extractor_utils.py‎
Lines changed: 66 additions & 1 deletion
diff --git a/‎lib/cuckoo/common/fraunhofer_helper.py‎
Lines changed: 10 additions & 0 deletions b/‎lib/cuckoo/common/fraunhofer_helper.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎lib/cuckoo/common/hypervisor_config.py‎
Lines changed: 19 additions & 0 deletions b/‎lib/cuckoo/common/hypervisor_config.py‎
Lines changed: 19 additions & 0 deletions
@@ -1,5 +1,8 @@
 [cuckoo]
 
+# Ignore Signals, will quit CAPE inmediatelly instead wait jobs to finish
+ignore_signals = yes
+
 # Which category of tasks do you want to analyze?
 categories = static, pcap, url, file
 
@@ -35,7 +38,7 @@ scaling_semaphore_update_timer = 10
 
 # Specify a timeout for tasks, useful if you are bound to timely reports awaited by users
 task_timeout = off
-task_pending_timeout = 0 
+task_pending_timeout = 0
 task_timeout_scan_interval = 30
 
 # Enable creation of memory dump of the analysis machine before shutting
 
@@ -227,6 +227,7 @@ virustotal = no
 # here is a Intelligence API key, not a Public API key
 vtkey =
 malwarebazaar = no
+malwarebazaar_apikey =
 
 [yara_detail]
 enabled = no
@@ -4,7 +4,7 @@ set -ex
 # run this via...
 # cd /opt/CAPEv2/ ; sudo -u cape /etc/poetry/bin/poetry run extra/libvirt_installer.sh
 
-LIB_VERSION=10.10.0
+LIB_VERSION=11.0.0
 cd /tmp || return
 
 if [ ! -f v${LIB_VERSION}.zip ]; then
 
@@ -59,7 +59,7 @@ QTARGETS="--target-list=i386-softmmu,x86_64-softmmu,i386-linux-user,x86_64-linux
 qemu_version=9.2.0
 # libvirt - https://libvirt.org/sources/
 # changelog - https://libvirt.org/news.html
-libvirt_version=10.10.0
+libvirt_version=11.0.0
 # virt-manager - https://github.com/virt-manager/virt-manager/releases
 # autofilled
 OS=""
 
@@ -116,7 +116,7 @@
 BUFSIZE = int(cfg.processing.analysis_size_limit)
 
 
-def hash_file(method, path):
+def hash_file(method, path: str) -> str:
     """Calculates an hash on a file by path.
     @param method: callable hashing method
     @param path: file path
@@ -143,6 +143,17 @@ def convert(data):
 
 
 def is_duplicated_binary(file_info: dict, cape_file: dict, append_file: bool) -> bool:
+    """
+    Determines if a binary file is a duplicate based on various criteria.
+
+    Args:
+        file_info (dict): Information about the file being checked.
+        cape_file (dict): Information about the existing CAPE file.
+        append_file (bool): Flag indicating whether to append the file.
+
+    Returns:
+        bool: False if the file is determined to be a duplicate, otherwise returns the value of append_file.
+    """
     if HAVE_PYDEEP:
         ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode())
         if ssdeep_grade >= ssdeep_threshold:
@@ -162,9 +173,25 @@ def is_duplicated_binary(file_info: dict, cape_file: dict, append_file: bool) ->
     return append_file
 
 
-def static_config_parsers(cape_name, file_path, file_data):
+def static_config_parsers(cape_name: str, file_path: str, file_data: bytes) -> dict:
+    """
+    Process CAPE Yara hits and extract configuration data using various parsers.
+
+    This function attempts to extract configuration data from a given file using different parsers
+    such as CAPE extractors, DC3-MWCP, and Malwareconfigs. The function returns a dictionary containing
+    the extracted configuration data.
+
+    Args:
+        cape_name (str): The name of the CAPE parser to use.
+        file_path (str): The path to the file being analyzed.
+        file_data (bytes): The binary data of the file being analyzed.
+
+    Returns:
+        dict: A dictionary containing the extracted configuration data. If no configuration data is
+            extracted, an empty dictionary is returned.
+    """
     """Process CAPE Yara hits"""
-    cape_config = {cape_name: {}}
+    cape_config = {}
     parser_loaded = False
     # CAPE - pure python parsers
     # MWCP
@@ -184,14 +211,14 @@ def static_config_parsers(cape_name, file_path, file_data):
                     # python3 map object returns iterator by default, not list and not serializeable in JSON.
                     if isinstance(value, map):
                         value = list(value)
-                    cape_config[cape_name].update({key: [value]})
+                    cape_config.setdefault(cape_name, {}).update({key: [value]})
                 parser_loaded = True
             elif isinstance(cape_configraw, dict):
                 for key, value in cape_configraw.items():
                     # python3 map object returns iterator by default, not list and not serializeable in JSON.
                     if isinstance(value, map):
                         value = list(value)
-                    cape_config[cape_name].update({key: [value]})
+                    cape_config.setdefault(cape_name, {}).update({key: [value]})
                 parser_loaded = True
         except Exception as e:
             log.exception("CAPE: parsing error on %s with %s: %s", file_path, cape_name, e)
@@ -215,7 +242,7 @@ def static_config_parsers(cape_name, file_path, file_data):
                     del reportmeta["other"]
 
                 tmp_dict.update(reportmeta)
-                cape_config[cape_name] = convert(tmp_dict)
+                cape_config.setdefault(cape_name, {}).update(convert(tmp_dict))
                 log.debug("CAPE: DC3-MWCP parser for %s completed", cape_name)
             else:
                 error_lines = report.errors[0].split("\n")
@@ -252,10 +279,10 @@ def static_config_parsers(cape_name, file_path, file_data):
                 # ToDo remove
                 if isinstance(malwareconfig_config, list):
                     for key, value in malwareconfig_config[0].items():
-                        cape_config[cape_name].update({key: [value]})
+                        cape_config.setdefault(cape_name, {}).update({key: [value]})
                 elif isinstance(malwareconfig_config, dict):
                     for key, value in malwareconfig_config.items():
-                        cape_config[cape_name].update({key: [value]})
+                        cape_config.setdefault(cape_name, {}).update({key: [value]})
         except Exception as e:
             if "rules" in str(e):
                 log.warning("You probably need to compile yara-python with dotnet support")
@@ -267,9 +294,6 @@ def static_config_parsers(cape_name, file_path, file_data):
                     cape_name,
                     str(e),
                 )
-
-        if cape_config.get(cape_name) == {}:
-            return {}
     """
     elif HAVE_MALDUCK and not parser_loaded and cape_name.lower() in malduck_modules_names:
         log.debug("Running Malduck on %s", file_path)
@@ -290,14 +314,26 @@ def static_config_parsers(cape_name, file_path, file_data):
         del ext
         if tmp_config:
             for key, value in tmp_config[0].items():
-                cape_config[cape_name].update({key: [value]})
+                cape_config.setdefault(cape_name, {}).update({key: [value]})
     """
-    if not cape_config[cape_name]:
-        return {}
+
     return cape_config
 
 
-def static_config_lookup(file_path, sha256=False):
+def static_config_lookup(file_path: str, sha256: str = False) -> dict:
+    """
+    Look up static configuration information for a given file based on its SHA-256 hash.
+
+    This function calculates the SHA-256 hash of the file at the specified path if not provided,
+    and then queries either a MongoDB or Elasticsearch database to retrieve configuration information.
+
+    Args:
+        file_path (str): The path to the file for which to look up configuration information.
+        sha256 (str, optional): The SHA-256 hash of the file. If not provided, it will be calculated.
+
+    Returns:
+        dict or None: A dictionary containing the configuration information if found, otherwise None.
+    """
     if not sha256:
         sha256 = hashlib.sha256(open(file_path, "rb").read()).hexdigest()
 
@@ -327,13 +363,26 @@ def static_config_lookup(file_path, sha256=False):
 named_static_extractors = []
 
 
-def static_extraction(path):
-    config = False
+def static_extraction(path: str) -> dict:
+    """
+    Extracts static configuration from a file using YARA rules and named static extractors.
+
+    Args:
+        path (str): The file path to be analyzed.
+
+    Returns:
+        dict or bool: The extracted configuration as a dictionary if successful,
+                    False if no configuration is found or an error occurs.
+
+    Raises:
+        Exception: Logs any exceptions that occur during the extraction process.
+    """
+    config = {}
     try:
         hits = File(path).get_yara(category="CAPE")
         path_name = Path(path).name
         if not hits and path_name not in named_static_extractors:
-            return False
+            return config
         file_data = path_read_file(path)
         if path_name in named_static_extractors:
             config = static_config_parsers(path_name, path, file_data)
@@ -349,7 +398,18 @@ def static_extraction(path):
     return config
 
 
-def cape_name_from_yara(details, pid, results):
+def cape_name_from_yara(details: dict, pid: int, results: dict) -> str:
+    """
+    Extracts the CAPE name from YARA hit details and associates it with a process ID (pid) in the results dictionary.
+
+    Args:
+        details (dict): A dictionary containing YARA hit details, expected to have a key "cape_yara" with a list of hits.
+        pid (int): The process ID to associate the CAPE name with.
+        results (dict): A dictionary to store the association between detections and process IDs.
+
+    Returns:
+        str: The CAPE name extracted from the YARA hit, or None if no CAPE name is found.
+    """
     for hit in details.get("cape_yara", []) or []:
         if File.yara_hit_provides_detection(hit):
             if "detections2pid" not in results:
 
@@ -13,7 +13,22 @@
 log = logging.getLogger("dotnet_utils")
 
 
-def dotnet_user_strings(file: str = False, data: bytes = False, dn_whitelisting: list = []):
+def dotnet_user_strings(file: str = False, data: bytes = False, dn_whitelisting: list = []) -> list:
+    """
+    Extracts user strings from a .NET file or data blob using dnfile.
+
+    Args:
+        file (str): Path to the .NET file. Default is False.
+        data (bytes): Byte data of the .NET file. Default is False.
+        dn_whitelisting (list): List of string patterns to whitelist. Default is an empty list.
+
+    Returns:
+        list: A list of extracted user strings that are not in the whitelist.
+
+    Raises:
+        Exception: If there is an error processing the .NET file or data.
+    """
+
     if not HAVE_DNFILE:
         return []
 
 
@@ -5,6 +5,19 @@
 
 # dotnet
 def get_mdtoken(data: bytes) -> int:
+    """
+    Extracts a metadata token from the given byte data.
+
+    The function interprets the first 4 bytes of the input data as an unsigned
+    integer in little-endian format and then masks it with 0xFFFFFF to obtain
+    the metadata token.
+
+    Args:
+        data (bytes): The byte data from which to extract the metadata token.
+
+    Returns:
+        int: The extracted metadata token.
+    """
     return struct.unpack_from("<I", data)[0] & 0xFFFFFF
 
 
@@ -15,6 +28,20 @@ def get_data_offset(pe: pefile.PE, string_offset: int, addr: int) -> int:
 
 def calc_section_alignment(pe: pefile.PE, offset: int, addr: int) -> int:
     """
+    Calculate the alignment between two sections in a PE file.
+
+    Args:
+        pe (pefile.PE): The PE file object.
+        offset (int): The offset value, typically calculated as
+                    struct.unpack("i", blob[0x43:0x47])[0] + 0x47.
+        addr (int): The address where data starts, which can be a YARA address match.
+
+    Returns:
+        int: The calculated alignment between the sections. Returns 0 if sections are not found or an error occurs.
+
+    Raises:
+        Exception: If an error occurs during the calculation, it will be caught and printed.
+
     offset is: Ex struct.unpack("i", blob[0x43:0x47])[0] + 0x47
     addr is where data starts, can be YARA address match
     """
@@ -31,7 +58,20 @@ def calc_section_alignment(pe: pefile.PE, offset: int, addr: int) -> int:
     return alignment
 
 
-def function_offset_from_VA(addr, blob, pe):
+def function_offset_from_VA(addr: int, blob: bytes, pe: pefile.PE):
+    """
+    Calculate the function offset from a given virtual address (VA) in a PE file.
+
+    Args:
+        addr (int): The virtual address to start from.
+        blob (bytes): The binary data blob containing the instructions.
+        pe (PE): The PE file object, typically from the pefile module.
+
+    Returns:
+        tuple: A tuple containing:
+            - function_addr (int): The calculated function address.
+            - offset (int): The offset of the next instruction after the function call.
+    """
     shift_pos = blob.find(b"\xE8") + 1
     function_addr = pe.get_rva_from_offset(addr + shift_pos) + pe.OPTIONAL_HEADER.ImageBase
     # print(f"Getting offset for function: {hex(function_addr)}")
@@ -41,6 +81,19 @@ def function_offset_from_VA(addr, blob, pe):
 
 
 def function_offset_from_offset(addr: int, binary: bytes, pe: pefile.PE):
+    """
+    Calculates the virtual address and file offset of a subfunction call within a binary.
+
+    Args:
+        addr (int): The starting address to search for the CALL instruction.
+        binary (bytes): The binary data of the executable.
+        pe (pefile.PE): The PE file object representing the executable.
+
+    Returns:
+        tuple: A tuple containing:
+            - call_virtual_address (int): The virtual address of the CALL instruction.
+            - subfunc_file_offset (int): The file offset of the subfunction being called.
+    """
     # where our subcall starts - example: 8
     shift_pos = binary[addr:].find(b"\xE8")
     call_file_offset = addr + shift_pos
@@ -56,6 +109,18 @@ def function_offset_from_offset(addr: int, binary: bytes, pe: pefile.PE):
 
 
 def find_function_xrefs(data, start, end):
+    """
+    Finds function cross-references (xrefs) within a specified range in the given binary data.
+
+    Args:
+        data (bytes): The binary data to search for function xrefs.
+        start (int): The starting address (inclusive) of the range to search.
+        end (int): The ending address (exclusive) of the range to search.
+
+    Returns:
+        dict: A dictionary where keys are target addresses of CALL instructions and values are lists of addresses
+            where these CALL instructions are located.
+    """
     function_xrefs = {}
     # The re.finditer function only finds *non-overlapping* matches, which fails to find some CALL instructions
     for rva in range(start, end):
 
@@ -22,6 +22,16 @@
 
 
 def get_dga_lookup_dict():
+    """
+    Retrieves the DGA (Domain Generation Algorithm) lookup dictionary from a gzipped JSON file.
+
+    The function constructs the file path to the DGA lookup dictionary, checks if the file exists,
+    and if it does, reads and decompresses the file, then loads its contents as a JSON object.
+    If the file does not exist, it returns an empty dictionary.
+
+    Returns:
+        dict: The DGA lookup dictionary if the file exists, otherwise an empty dictionary.
+    """
     dga_lookup_path = os.path.join(CUCKOO_ROOT, "data", "dga_lookup_dict.json.gz")
     if path_exists(dga_lookup_path):
         with gzip.GzipFile(dga_lookup_path, "r") as fin:
 
@@ -8,6 +8,25 @@
 
 
 def proxmox_shutdown_vm(machineName: str):
+    """
+    Shuts down a virtual machine on a Proxmox server.
+
+    Args:
+        machineName (str): The name of the virtual machine to shut down.
+
+    Raises:
+        Exception: If there is an error during the shutdown process.
+
+    Notes:
+        - This function does not support multiple Proxmox servers.
+        - The Proxmox server configuration is expected to be available in the `proxmox_conf` object.
+        - The function retrieves the VM ID from the `proxmox_conf.Node_1` configuration using the provided machine name.
+        - The function sends a POST request to the Proxmox API to obtain an authentication ticket and CSRF prevention token.
+        - The function then sends another POST request to shut down the specified virtual machine.
+        - If the shutdown is successful, a message is printed to indicate success.
+        - If an error occurs, it is caught and printed.
+    """
+
     proxmox_server = proxmox_conf.proxmox.hostname
     # Not supporting multiple servers
     nodes = proxmox_conf.proxmox.nodes