Skip to content

Commit a2f86d6

Browse files
authored
Merge pull request #4 from varun-r-mallya/type_system
Type system and strings
2 parents 1517f6e + 0f365be commit a2f86d6

File tree

6 files changed

+110
-28
lines changed

6 files changed

+110
-28
lines changed

examples/execve5.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
class data_t:
1111
pid: c_uint64
1212
ts: c_uint64
13+
comm: str(16)
1314

1415

1516
@bpf
@@ -21,13 +22,14 @@ def events() -> PerfEventArray:
2122
@bpf
2223
@section("tracepoint/syscalls/sys_enter_clone")
2324
def hello(ctx: c_void_p) -> c_int32:
24-
strobj = "Hi"
2525
dataobj = data_t()
2626
ts = ktime()
2727
process_id = pid()
28+
strobj = "hellohellohello"
2829
dataobj.pid = process_id
2930
dataobj.ts = ts
30-
print(f"clone called at {ts} by pid {process_id}")
31+
# dataobj.comm = strobj
32+
print(f"clone called at {ts} by pid {process_id}, comm {strobj}")
3133
events.output(dataobj)
3234
return c_int32(0)
3335

pythonbpf/bpf_helper_handler.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab
1313
fn_ptr_type = ir.PointerType(fn_type)
1414
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
1515
result = builder.call(fn_ptr, [], tail=False)
16-
return result
16+
return result, ir.IntType(64)
1717

1818

1919
def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@@ -60,7 +60,7 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_
6060

6161
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
6262

63-
return result
63+
return result, ir.PointerType()
6464

6565

6666
def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
@@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
7575
exprs = []
7676

7777
for value in call.args[0].values:
78+
print("Value in f-string:", ast.dump(value))
7879
if isinstance(value, ast.Constant):
7980
if isinstance(value.value, str):
8081
fmt_parts.append(value.value)
@@ -86,10 +87,24 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
8687
"Only string and integer constants are supported in f-string.")
8788
elif isinstance(value, ast.FormattedValue):
8889
print("Formatted value:", ast.dump(value))
89-
# Assume int for now
90-
fmt_parts.append("%lld")
90+
# TODO: Dirty handling here, only checks for int or str
9191
if isinstance(value.value, ast.Name):
92-
exprs.append(value.value)
92+
if local_sym_tab and value.value.id in local_sym_tab:
93+
var_ptr, var_type = local_sym_tab[value.value.id]
94+
if isinstance(var_type, ir.IntType):
95+
fmt_parts.append("%lld")
96+
exprs.append(value.value)
97+
elif var_type == ir.PointerType(ir.IntType(8)):
98+
# Case with string
99+
fmt_parts.append("%s")
100+
exprs.append(value.value)
101+
else:
102+
raise NotImplementedError(
103+
"Only integer and pointer types are supported in formatted values.")
104+
print("Formatted value variable:", var_ptr, var_type)
105+
else:
106+
raise ValueError(
107+
f"Variable {value.value.id} not found in local symbol table.")
93108
else:
94109
raise NotImplementedError(
95110
"Only simple variable names are supported in formatted values.")
@@ -121,7 +136,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
121136
"Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.")
122137

123138
for expr in exprs[:3]:
124-
val = eval_expr(func, module, builder, expr, local_sym_tab, None)
139+
val, _ = eval_expr(func, module, builder,
140+
expr, local_sym_tab, None)
125141
if val:
126142
if isinstance(val.type, ir.PointerType):
127143
val = builder.ptrtoint(val, ir.IntType(64))
@@ -137,7 +153,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
137153
print(
138154
"Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.")
139155
args.append(ir.Constant(ir.IntType(64), 0))
140-
141156
fn_type = ir.FunctionType(ir.IntType(
142157
64), [ir.PointerType(), ir.IntType(32)], var_arg=True)
143158
fn_ptr_type = ir.PointerType(fn_type)
@@ -266,7 +281,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_
266281
result = builder.call(
267282
fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False)
268283

269-
return result
284+
return result, None
270285

271286

272287
def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@@ -321,7 +336,7 @@ def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_
321336
# Call the helper function
322337
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
323338

324-
return result
339+
return result, None
325340

326341

327342
def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
@@ -338,7 +353,7 @@ def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local
338353
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
339354
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
340355
pid = builder.and_(result, mask)
341-
return pid
356+
return pid, ir.IntType(64)
342357

343358

344359
def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@@ -387,7 +402,7 @@ def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sy
387402

388403
result = builder.call(
389404
fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False)
390-
return result
405+
return result, None
391406
else:
392407
raise NotImplementedError(
393408
"Only simple object names are supported as data in perf event output.")

pythonbpf/codegen.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def compile_to_ir(filename: str, output: str):
9393

9494
module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"])
9595

96+
print(f"IR written to {output}")
9697
with open(output, "w") as f:
9798
f.write(f"source_filename = \"{filename}\"\n")
9899
f.write(str(module))
@@ -118,6 +119,7 @@ def compile():
118119

119120
print(f"Object written to {o_file}, {ll_file} can be removed")
120121

122+
121123
def BPF() -> BpfProgram:
122124
caller_frame = inspect.stack()[1]
123125
caller_file = Path(caller_frame.filename).resolve()
@@ -129,5 +131,5 @@ def BPF() -> BpfProgram:
129131
"llc", "-march=bpf", "-filetype=obj", "-O2",
130132
str(ll_file), "-o", str(o_file)
131133
], check=True)
132-
134+
133135
return BpfProgram(str(o_file))

pythonbpf/expr_pass.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33

44

55
def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None):
6-
print(f"Evaluating expression: {expr}")
6+
print(f"Evaluating expression: {ast.dump(expr)}")
77
if isinstance(expr, ast.Name):
88
if expr.id in local_sym_tab:
99
var = local_sym_tab[expr.id][0]
1010
val = builder.load(var)
11-
return val
11+
return val, local_sym_tab[expr.id][1] # return value and type
1212
else:
1313
print(f"Undefined variable {expr.id}")
1414
return None
1515
elif isinstance(expr, ast.Constant):
1616
if isinstance(expr.value, int):
17-
return ir.Constant(ir.IntType(64), expr.value)
17+
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
1818
elif isinstance(expr.value, bool):
19-
return ir.Constant(ir.IntType(1), int(expr.value))
19+
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
2020
else:
2121
print("Unsupported constant type")
2222
return None
@@ -44,8 +44,9 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s
4444
if arg is None:
4545
print("Failed to evaluate deref argument")
4646
return None
47+
# Since we are handling only name case, directly take type from sym tab
4748
val = builder.load(arg)
48-
return val
49+
return val, local_sym_tab[expr.args[0].id][1]
4950

5051
# check for helpers
5152
if expr.func.id in helper_func_list:

pythonbpf/functions_pass.py

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,18 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
5858
inbounds=True)
5959
val = eval_expr(func, module, builder, rval,
6060
local_sym_tab, map_sym_tab, structs_sym_tab)
61+
if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
62+
# TODO: Figure it out, not a priority rn
63+
# Special case for string assignment to char array
64+
#str_len = struct_info["field_types"][field_idx].count
65+
#assign_string_to_array(builder, field_ptr, val[0], str_len)
66+
#print(f"Assigned to struct field {var_name}.{field_name}")
67+
pass
6168
if val is None:
6269
print("Failed to evaluate struct field assignment")
6370
return
64-
builder.store(val, field_ptr)
71+
print(field_ptr)
72+
builder.store(val[0], field_ptr)
6573
print(f"Assigned to struct field {var_name}.{field_name}")
6674
return
6775
elif isinstance(rval, ast.Constant):
@@ -114,7 +122,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
114122
# var.align = 8
115123
val = handle_helper_call(
116124
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
117-
builder.store(val, local_sym_tab[var_name][0])
125+
builder.store(val[0], local_sym_tab[var_name][0])
118126
# local_sym_tab[var_name] = var
119127
print(f"Assigned constant {rval.func.id} to {var_name}")
120128
elif call_type == "deref" and len(rval.args) == 1:
@@ -125,7 +133,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
125133
print("Failed to evaluate deref argument")
126134
return
127135
print(f"Dereferenced value: {val}, storing in {var_name}")
128-
builder.store(val, local_sym_tab[var_name][0])
136+
builder.store(val[0], local_sym_tab[var_name][0])
129137
# local_sym_tab[var_name] = var
130138
print(f"Dereferenced and assigned to {var_name}")
131139
elif call_type in structs_sym_tab and len(rval.args) == 0:
@@ -155,7 +163,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
155163
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
156164
# var = builder.alloca(ir.IntType(64), name=var_name)
157165
# var.align = 8
158-
builder.store(val, local_sym_tab[var_name][0])
166+
builder.store(val[0], local_sym_tab[var_name][0])
159167
# local_sym_tab[var_name] = var
160168
else:
161169
print("Unsupported assignment call structure")
@@ -196,12 +204,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
196204
return None
197205
elif isinstance(cond, ast.Compare):
198206
lhs = eval_expr(func, module, builder, cond.left,
199-
local_sym_tab, map_sym_tab)
207+
local_sym_tab, map_sym_tab)[0]
200208
if len(cond.ops) != 1 or len(cond.comparators) != 1:
201209
print("Unsupported complex comparison")
202210
return None
203211
rhs = eval_expr(func, module, builder,
204-
cond.comparators[0], local_sym_tab, map_sym_tab)
212+
cond.comparators[0], local_sym_tab, map_sym_tab)[0]
205213
op = cond.ops[0]
206214

207215
if lhs.type != rhs.type:
@@ -462,7 +470,6 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
462470

463471
process_func_body(module, builder, func_node, func,
464472
ret_type, map_sym_tab, structs_sym_tab)
465-
466473
return func
467474

468475

@@ -538,3 +545,46 @@ def _expr_type(e):
538545
raise ValueError("Conflicting return types:"
539546
f"{found_type} vs {t}")
540547
return found_type or "None"
548+
549+
# For string assignment to fixed-size arrays
550+
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
551+
"""
552+
Copy a string (i8*) to a fixed-size array ([N x i8]*)
553+
"""
554+
# Create a loop to copy characters one by one
555+
entry_block = builder.block
556+
copy_block = builder.append_basic_block("copy_char")
557+
end_block = builder.append_basic_block("copy_end")
558+
559+
# Create loop counter
560+
i = builder.alloca(ir.IntType(32))
561+
builder.store(ir.Constant(ir.IntType(32), 0), i)
562+
563+
# Start the loop
564+
builder.branch(copy_block)
565+
566+
# Copy loop
567+
builder.position_at_end(copy_block)
568+
idx = builder.load(i)
569+
in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length))
570+
builder.cbranch(in_bounds, copy_block, end_block)
571+
572+
with builder.if_then(in_bounds):
573+
# Load character from source
574+
src_ptr = builder.gep(source_string_ptr, [idx])
575+
char = builder.load(src_ptr)
576+
577+
# Store character in target
578+
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
579+
builder.store(char, dst_ptr)
580+
581+
# Increment counter
582+
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
583+
builder.store(next_idx, i)
584+
585+
builder.position_at_end(end_block)
586+
587+
# Ensure null termination
588+
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
589+
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
590+
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)

pythonbpf/structs_pass.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,25 @@ def process_bpf_struct(cls_node, module):
2828

2929
for item in cls_node.body:
3030
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
31+
print(f"Field: {item.target.id}, Type: "
32+
f"{ast.dump(item.annotation)}")
3133
field_names.append(item.target.id)
32-
field_types.append(ctypes_to_ir(item.annotation.id))
34+
if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str":
35+
# This is a char array with fixed length
36+
# TODO: For now assuming str is always called with constant
37+
field_types.append(ir.ArrayType(
38+
ir.IntType(8), item.annotation.args[0].value))
39+
else:
40+
field_types.append(ctypes_to_ir(item.annotation.id))
3341

3442
curr_offset = 0
3543
for ftype in field_types:
3644
if isinstance(ftype, ir.IntType):
3745
fsize = ftype.width // 8
3846
alignment = fsize
47+
elif isinstance(ftype, ir.ArrayType):
48+
fsize = ftype.count * (ftype.element.width // 8)
49+
alignment = ftype.element.width // 8
3950
elif isinstance(ftype, ir.PointerType):
4051
fsize = 8
4152
alignment = 8
@@ -52,6 +63,7 @@ def process_bpf_struct(cls_node, module):
5263
structs_sym_tab[struct_name] = {
5364
"type": struct_type,
5465
"fields": {name: idx for idx, name in enumerate(field_names)},
55-
"size": total_size
66+
"size": total_size,
67+
"field_types": field_types,
5668
}
5769
print(f"Created struct {struct_name} with fields {field_names}")

0 commit comments

Comments
 (0)