Skip to content

Commit b392b01

Browse files
committed
Merge branch 'master' into staging
2 parents 54de070 + 6bf8860 commit b392b01

26 files changed

+971
-101
lines changed

conf/default/cuckoo.conf.default

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
[cuckoo]
22

3+
# Ignore Signals, will quit CAPE inmediatelly instead wait jobs to finish
4+
ignore_signals = yes
5+
36
# Which category of tasks do you want to analyze?
47
categories = static, pcap, url, file
58

@@ -35,7 +38,7 @@ scaling_semaphore_update_timer = 10
3538

3639
# Specify a timeout for tasks, useful if you are bound to timely reports awaited by users
3740
task_timeout = off
38-
task_pending_timeout = 0
41+
task_pending_timeout = 0
3942
task_timeout_scan_interval = 30
4043

4144
# Enable creation of memory dump of the analysis machine before shutting

conf/default/web.conf.default

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ virustotal = no
227227
# here is a Intelligence API key, not a Public API key
228228
vtkey =
229229
malwarebazaar = no
230+
malwarebazaar_apikey =
230231

231232
[yara_detail]
232233
enabled = no

extra/libvirt_installer.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -ex
44
# run this via...
55
# cd /opt/CAPEv2/ ; sudo -u cape /etc/poetry/bin/poetry run extra/libvirt_installer.sh
66

7-
LIB_VERSION=10.10.0
7+
LIB_VERSION=11.0.0
88
cd /tmp || return
99

1010
if [ ! -f v${LIB_VERSION}.zip ]; then

installer/kvm-qemu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ QTARGETS="--target-list=i386-softmmu,x86_64-softmmu,i386-linux-user,x86_64-linux
5959
qemu_version=9.2.0
6060
# libvirt - https://libvirt.org/sources/
6161
# changelog - https://libvirt.org/news.html
62-
libvirt_version=10.10.0
62+
libvirt_version=11.0.0
6363
# virt-manager - https://github.com/virt-manager/virt-manager/releases
6464
# autofilled
6565
OS=""

lib/cuckoo/common/cape_utils.py

Lines changed: 79 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
BUFSIZE = int(cfg.processing.analysis_size_limit)
117117

118118

119-
def hash_file(method, path):
119+
def hash_file(method, path: str) -> str:
120120
"""Calculates an hash on a file by path.
121121
@param method: callable hashing method
122122
@param path: file path
@@ -143,6 +143,17 @@ def convert(data):
143143

144144

145145
def is_duplicated_binary(file_info: dict, cape_file: dict, append_file: bool) -> bool:
146+
"""
147+
Determines if a binary file is a duplicate based on various criteria.
148+
149+
Args:
150+
file_info (dict): Information about the file being checked.
151+
cape_file (dict): Information about the existing CAPE file.
152+
append_file (bool): Flag indicating whether to append the file.
153+
154+
Returns:
155+
bool: False if the file is determined to be a duplicate, otherwise returns the value of append_file.
156+
"""
146157
if HAVE_PYDEEP:
147158
ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode())
148159
if ssdeep_grade >= ssdeep_threshold:
@@ -162,9 +173,25 @@ def is_duplicated_binary(file_info: dict, cape_file: dict, append_file: bool) ->
162173
return append_file
163174

164175

165-
def static_config_parsers(cape_name, file_path, file_data):
176+
def static_config_parsers(cape_name: str, file_path: str, file_data: bytes) -> dict:
177+
"""
178+
Process CAPE Yara hits and extract configuration data using various parsers.
179+
180+
This function attempts to extract configuration data from a given file using different parsers
181+
such as CAPE extractors, DC3-MWCP, and Malwareconfigs. The function returns a dictionary containing
182+
the extracted configuration data.
183+
184+
Args:
185+
cape_name (str): The name of the CAPE parser to use.
186+
file_path (str): The path to the file being analyzed.
187+
file_data (bytes): The binary data of the file being analyzed.
188+
189+
Returns:
190+
dict: A dictionary containing the extracted configuration data. If no configuration data is
191+
extracted, an empty dictionary is returned.
192+
"""
166193
"""Process CAPE Yara hits"""
167-
cape_config = {cape_name: {}}
194+
cape_config = {}
168195
parser_loaded = False
169196
# CAPE - pure python parsers
170197
# MWCP
@@ -184,14 +211,14 @@ def static_config_parsers(cape_name, file_path, file_data):
184211
# python3 map object returns iterator by default, not list and not serializeable in JSON.
185212
if isinstance(value, map):
186213
value = list(value)
187-
cape_config[cape_name].update({key: [value]})
214+
cape_config.setdefault(cape_name, {}).update({key: [value]})
188215
parser_loaded = True
189216
elif isinstance(cape_configraw, dict):
190217
for key, value in cape_configraw.items():
191218
# python3 map object returns iterator by default, not list and not serializeable in JSON.
192219
if isinstance(value, map):
193220
value = list(value)
194-
cape_config[cape_name].update({key: [value]})
221+
cape_config.setdefault(cape_name, {}).update({key: [value]})
195222
parser_loaded = True
196223
except Exception as e:
197224
log.exception("CAPE: parsing error on %s with %s: %s", file_path, cape_name, e)
@@ -215,7 +242,7 @@ def static_config_parsers(cape_name, file_path, file_data):
215242
del reportmeta["other"]
216243

217244
tmp_dict.update(reportmeta)
218-
cape_config[cape_name] = convert(tmp_dict)
245+
cape_config.setdefault(cape_name, {}).update(convert(tmp_dict))
219246
log.debug("CAPE: DC3-MWCP parser for %s completed", cape_name)
220247
else:
221248
error_lines = report.errors[0].split("\n")
@@ -252,10 +279,10 @@ def static_config_parsers(cape_name, file_path, file_data):
252279
# ToDo remove
253280
if isinstance(malwareconfig_config, list):
254281
for key, value in malwareconfig_config[0].items():
255-
cape_config[cape_name].update({key: [value]})
282+
cape_config.setdefault(cape_name, {}).update({key: [value]})
256283
elif isinstance(malwareconfig_config, dict):
257284
for key, value in malwareconfig_config.items():
258-
cape_config[cape_name].update({key: [value]})
285+
cape_config.setdefault(cape_name, {}).update({key: [value]})
259286
except Exception as e:
260287
if "rules" in str(e):
261288
log.warning("You probably need to compile yara-python with dotnet support")
@@ -267,9 +294,6 @@ def static_config_parsers(cape_name, file_path, file_data):
267294
cape_name,
268295
str(e),
269296
)
270-
271-
if cape_config.get(cape_name) == {}:
272-
return {}
273297
"""
274298
elif HAVE_MALDUCK and not parser_loaded and cape_name.lower() in malduck_modules_names:
275299
log.debug("Running Malduck on %s", file_path)
@@ -290,14 +314,26 @@ def static_config_parsers(cape_name, file_path, file_data):
290314
del ext
291315
if tmp_config:
292316
for key, value in tmp_config[0].items():
293-
cape_config[cape_name].update({key: [value]})
317+
cape_config.setdefault(cape_name, {}).update({key: [value]})
294318
"""
295-
if not cape_config[cape_name]:
296-
return {}
319+
297320
return cape_config
298321

299322

300-
def static_config_lookup(file_path, sha256=False):
323+
def static_config_lookup(file_path: str, sha256: str = False) -> dict:
324+
"""
325+
Look up static configuration information for a given file based on its SHA-256 hash.
326+
327+
This function calculates the SHA-256 hash of the file at the specified path if not provided,
328+
and then queries either a MongoDB or Elasticsearch database to retrieve configuration information.
329+
330+
Args:
331+
file_path (str): The path to the file for which to look up configuration information.
332+
sha256 (str, optional): The SHA-256 hash of the file. If not provided, it will be calculated.
333+
334+
Returns:
335+
dict or None: A dictionary containing the configuration information if found, otherwise None.
336+
"""
301337
if not sha256:
302338
sha256 = hashlib.sha256(open(file_path, "rb").read()).hexdigest()
303339

@@ -327,13 +363,26 @@ def static_config_lookup(file_path, sha256=False):
327363
named_static_extractors = []
328364

329365

330-
def static_extraction(path):
331-
config = False
366+
def static_extraction(path: str) -> dict:
367+
"""
368+
Extracts static configuration from a file using YARA rules and named static extractors.
369+
370+
Args:
371+
path (str): The file path to be analyzed.
372+
373+
Returns:
374+
dict or bool: The extracted configuration as a dictionary if successful,
375+
False if no configuration is found or an error occurs.
376+
377+
Raises:
378+
Exception: Logs any exceptions that occur during the extraction process.
379+
"""
380+
config = {}
332381
try:
333382
hits = File(path).get_yara(category="CAPE")
334383
path_name = Path(path).name
335384
if not hits and path_name not in named_static_extractors:
336-
return False
385+
return config
337386
file_data = path_read_file(path)
338387
if path_name in named_static_extractors:
339388
config = static_config_parsers(path_name, path, file_data)
@@ -349,7 +398,18 @@ def static_extraction(path):
349398
return config
350399

351400

352-
def cape_name_from_yara(details, pid, results):
401+
def cape_name_from_yara(details: dict, pid: int, results: dict) -> str:
402+
"""
403+
Extracts the CAPE name from YARA hit details and associates it with a process ID (pid) in the results dictionary.
404+
405+
Args:
406+
details (dict): A dictionary containing YARA hit details, expected to have a key "cape_yara" with a list of hits.
407+
pid (int): The process ID to associate the CAPE name with.
408+
results (dict): A dictionary to store the association between detections and process IDs.
409+
410+
Returns:
411+
str: The CAPE name extracted from the YARA hit, or None if no CAPE name is found.
412+
"""
353413
for hit in details.get("cape_yara", []) or []:
354414
if File.yara_hit_provides_detection(hit):
355415
if "detections2pid" not in results:

lib/cuckoo/common/dotnet_utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,22 @@
1313
log = logging.getLogger("dotnet_utils")
1414

1515

16-
def dotnet_user_strings(file: str = False, data: bytes = False, dn_whitelisting: list = []):
16+
def dotnet_user_strings(file: str = False, data: bytes = False, dn_whitelisting: list = []) -> list:
17+
"""
18+
Extracts user strings from a .NET file or data blob using dnfile.
19+
20+
Args:
21+
file (str): Path to the .NET file. Default is False.
22+
data (bytes): Byte data of the .NET file. Default is False.
23+
dn_whitelisting (list): List of string patterns to whitelist. Default is an empty list.
24+
25+
Returns:
26+
list: A list of extracted user strings that are not in the whitelist.
27+
28+
Raises:
29+
Exception: If there is an error processing the .NET file or data.
30+
"""
31+
1732
if not HAVE_DNFILE:
1833
return []
1934

lib/cuckoo/common/extractor_utils.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@
55

66
# dotnet
77
def get_mdtoken(data: bytes) -> int:
8+
"""
9+
Extracts a metadata token from the given byte data.
10+
11+
The function interprets the first 4 bytes of the input data as an unsigned
12+
integer in little-endian format and then masks it with 0xFFFFFF to obtain
13+
the metadata token.
14+
15+
Args:
16+
data (bytes): The byte data from which to extract the metadata token.
17+
18+
Returns:
19+
int: The extracted metadata token.
20+
"""
821
return struct.unpack_from("<I", data)[0] & 0xFFFFFF
922

1023

@@ -15,6 +28,20 @@ def get_data_offset(pe: pefile.PE, string_offset: int, addr: int) -> int:
1528

1629
def calc_section_alignment(pe: pefile.PE, offset: int, addr: int) -> int:
1730
"""
31+
Calculate the alignment between two sections in a PE file.
32+
33+
Args:
34+
pe (pefile.PE): The PE file object.
35+
offset (int): The offset value, typically calculated as
36+
struct.unpack("i", blob[0x43:0x47])[0] + 0x47.
37+
addr (int): The address where data starts, which can be a YARA address match.
38+
39+
Returns:
40+
int: The calculated alignment between the sections. Returns 0 if sections are not found or an error occurs.
41+
42+
Raises:
43+
Exception: If an error occurs during the calculation, it will be caught and printed.
44+
1845
offset is: Ex struct.unpack("i", blob[0x43:0x47])[0] + 0x47
1946
addr is where data starts, can be YARA address match
2047
"""
@@ -31,7 +58,20 @@ def calc_section_alignment(pe: pefile.PE, offset: int, addr: int) -> int:
3158
return alignment
3259

3360

34-
def function_offset_from_VA(addr, blob, pe):
61+
def function_offset_from_VA(addr: int, blob: bytes, pe: pefile.PE):
62+
"""
63+
Calculate the function offset from a given virtual address (VA) in a PE file.
64+
65+
Args:
66+
addr (int): The virtual address to start from.
67+
blob (bytes): The binary data blob containing the instructions.
68+
pe (PE): The PE file object, typically from the pefile module.
69+
70+
Returns:
71+
tuple: A tuple containing:
72+
- function_addr (int): The calculated function address.
73+
- offset (int): The offset of the next instruction after the function call.
74+
"""
3575
shift_pos = blob.find(b"\xE8") + 1
3676
function_addr = pe.get_rva_from_offset(addr + shift_pos) + pe.OPTIONAL_HEADER.ImageBase
3777
# print(f"Getting offset for function: {hex(function_addr)}")
@@ -41,6 +81,19 @@ def function_offset_from_VA(addr, blob, pe):
4181

4282

4383
def function_offset_from_offset(addr: int, binary: bytes, pe: pefile.PE):
84+
"""
85+
Calculates the virtual address and file offset of a subfunction call within a binary.
86+
87+
Args:
88+
addr (int): The starting address to search for the CALL instruction.
89+
binary (bytes): The binary data of the executable.
90+
pe (pefile.PE): The PE file object representing the executable.
91+
92+
Returns:
93+
tuple: A tuple containing:
94+
- call_virtual_address (int): The virtual address of the CALL instruction.
95+
- subfunc_file_offset (int): The file offset of the subfunction being called.
96+
"""
4497
# where our subcall starts - example: 8
4598
shift_pos = binary[addr:].find(b"\xE8")
4699
call_file_offset = addr + shift_pos
@@ -56,6 +109,18 @@ def function_offset_from_offset(addr: int, binary: bytes, pe: pefile.PE):
56109

57110

58111
def find_function_xrefs(data, start, end):
112+
"""
113+
Finds function cross-references (xrefs) within a specified range in the given binary data.
114+
115+
Args:
116+
data (bytes): The binary data to search for function xrefs.
117+
start (int): The starting address (inclusive) of the range to search.
118+
end (int): The ending address (exclusive) of the range to search.
119+
120+
Returns:
121+
dict: A dictionary where keys are target addresses of CALL instructions and values are lists of addresses
122+
where these CALL instructions are located.
123+
"""
59124
function_xrefs = {}
60125
# The re.finditer function only finds *non-overlapping* matches, which fails to find some CALL instructions
61126
for rva in range(start, end):

lib/cuckoo/common/fraunhofer_helper.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@
2222

2323

2424
def get_dga_lookup_dict():
25+
"""
26+
Retrieves the DGA (Domain Generation Algorithm) lookup dictionary from a gzipped JSON file.
27+
28+
The function constructs the file path to the DGA lookup dictionary, checks if the file exists,
29+
and if it does, reads and decompresses the file, then loads its contents as a JSON object.
30+
If the file does not exist, it returns an empty dictionary.
31+
32+
Returns:
33+
dict: The DGA lookup dictionary if the file exists, otherwise an empty dictionary.
34+
"""
2535
dga_lookup_path = os.path.join(CUCKOO_ROOT, "data", "dga_lookup_dict.json.gz")
2636
if path_exists(dga_lookup_path):
2737
with gzip.GzipFile(dga_lookup_path, "r") as fin:

lib/cuckoo/common/hypervisor_config.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,25 @@
88

99

1010
def proxmox_shutdown_vm(machineName: str):
11+
"""
12+
Shuts down a virtual machine on a Proxmox server.
13+
14+
Args:
15+
machineName (str): The name of the virtual machine to shut down.
16+
17+
Raises:
18+
Exception: If there is an error during the shutdown process.
19+
20+
Notes:
21+
- This function does not support multiple Proxmox servers.
22+
- The Proxmox server configuration is expected to be available in the `proxmox_conf` object.
23+
- The function retrieves the VM ID from the `proxmox_conf.Node_1` configuration using the provided machine name.
24+
- The function sends a POST request to the Proxmox API to obtain an authentication ticket and CSRF prevention token.
25+
- The function then sends another POST request to shut down the specified virtual machine.
26+
- If the shutdown is successful, a message is printed to indicate success.
27+
- If an error occurs, it is caught and printed.
28+
"""
29+
1130
proxmox_server = proxmox_conf.proxmox.hostname
1231
# Not supporting multiple servers
1332
nodes = proxmox_conf.proxmox.nodes

0 commit comments

Comments
 (0)