Skip to content

Commit f0f5b0a

Browse files
committed
fix: issue in the hoisting algorithm causing false positives in some edge cases.
When a `with` statement declares some variable `foo`, and `foo` is used in the right side of the declaration of some other variable `bar` within the same statement, the hoisting algorithm was not realizing that `bar` depends on `foo`.
1 parent cf33b43 commit f0f5b0a

File tree

6 files changed

+124
-10
lines changed

6 files changed

+124
-10
lines changed

lib/src/compiler/ir/dfs.rs

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ pub(crate) enum EventContext {
2222
Body,
2323
/// The current expression is a children of a field access expression.
2424
FieldAccess,
25+
/// The current expression is one of the expressions that initializes a
26+
/// variable in a `with` statement. For instance, the `<expr>` in:
27+
/// ```text
28+
/// with some_var = <expr> : ( .. )
29+
/// ```
30+
WithDeclaration,
2531
}
2632

2733
/// An iterator that conducts a Depth First Search (DFS) traversal of the IR
@@ -154,12 +160,12 @@ impl<'a> DFSWithScopeIter<'a> {
154160
/// corresponding to the `for` statement.
155161
///
156162
/// Let's see a more complex example, consider the following YARA
157-
/// expression, where we are positioned at `<inner expr>`:
163+
/// expression, where we are positioned at `<for body expr>`:
158164
///
159165
/// ```text
160-
/// with a = <expr> : (
166+
/// with a = <init expr> : (
161167
/// for any i in (0..10) : (
162-
/// <inner expr>
168+
/// <for body expr>
163169
/// )
164170
/// )
165171
/// ```
@@ -168,6 +174,9 @@ impl<'a> DFSWithScopeIter<'a> {
168174
/// statement first and then the [`ExprId`] corresponding to the `for`
169175
/// statement. The iterator processes the scopes starting from the outermost
170176
/// scope and progresses inward.
177+
///
178+
/// If we are positioned at `<init expr>`, the iterator returns the
179+
/// [`ExprId`] that corresponds to the `with` statement.
171180
pub fn scopes(&self) -> impl DoubleEndedIterator<Item = ExprId> + '_ {
172181
self.scopes.iter().cloned()
173182
}
@@ -192,15 +201,21 @@ impl Iterator for DFSWithScopeIter<'_> {
192201
}
193202
let next = match self.dfs.next()? {
194203
Event::Enter((expr_id, _, ctx)) => {
195-
if matches!(ctx, EventContext::Body) {
204+
if matches!(
205+
ctx,
206+
EventContext::Body | EventContext::WithDeclaration
207+
) {
196208
// If the current expression is the body of some other
197209
// expression, the current expression must have a parent.
198210
self.scopes.push(self.ir.get_parent(expr_id).unwrap());
199211
}
200212
Event::Enter((expr_id, ctx))
201213
}
202214
Event::Leave((expr_id, _, ctx)) => {
203-
if matches!(ctx, EventContext::Body) {
215+
if matches!(
216+
ctx,
217+
EventContext::Body | EventContext::WithDeclaration
218+
) {
204219
// Don't remove the scope at top of the stack right away.
205220
// If the user calls `scopes()` while processing the Leave
206221
// event, we want the current context to be there. We just
@@ -383,7 +398,8 @@ pub(super) fn dfs_common(
383398
Expr::With(with) => {
384399
stack.push(Event::Enter((with.body, EventContext::Body)));
385400
for (_id, expr) in with.declarations.iter().rev() {
386-
stack.push(Event::Enter((*expr, EventContext::None)))
401+
stack
402+
.push(Event::Enter((*expr, EventContext::WithDeclaration)))
387403
}
388404
}
389405
}
@@ -612,17 +628,17 @@ mod tests {
612628

613629
assert!(matches!(
614630
dfs.next(),
615-
Some(Event::Enter((expr_id, EventContext::None))) if expr_id == const_1
631+
Some(Event::Enter((expr_id, EventContext::WithDeclaration))) if expr_id == const_1
616632
));
617633

618-
assert_eq!(dfs.scopes().collect::<Vec<_>>(), vec![]);
634+
assert_eq!(dfs.scopes().collect::<Vec<_>>(), vec![with]);
619635

620636
assert!(matches!(
621637
dfs.next(),
622-
Some(Event::Leave((expr_id, EventContext::None))) if expr_id == const_1
638+
Some(Event::Leave((expr_id, EventContext::WithDeclaration))) if expr_id == const_1
623639
));
624640

625-
assert_eq!(dfs.scopes().collect::<Vec<_>>(), vec![]);
641+
assert_eq!(dfs.scopes().collect::<Vec<_>>(), vec![with]);
626642

627643
assert!(matches!(
628644
dfs.next(),
@@ -642,5 +658,7 @@ mod tests {
642658
dfs.next(),
643659
Some(Event::Leave((expr_id, EventContext::None))) if expr_id == with
644660
));
661+
662+
assert_eq!(dfs.scopes().collect::<Vec<_>>(), vec![]);
645663
}
646664
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
RULE test
2+
13: FOR_IN -- hash: 0xfc4f8d81a95824cd -- parent: None
3+
0: CONST integer(1) -- parent: 13
4+
1: CONST integer(2) -- parent: 13
5+
12: WITH -- hash: 0x2fa6e67ac30fd47a -- parent: 13
6+
6: FN_CALL uint32@i@i:R0:4294967295u -- hash: 0xb24700199f93dab8 -- parent: 12
7+
5: ADD -- hash: 0x685e7a2ba5a36f1d -- parent: 6
8+
3: PATTERN_OFFSET PatternIdx(0) INDEX -- hash: 0x7cb6e22690df7f1b -- parent: 5
9+
2: SYMBOL Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } -- parent: 3
10+
4: CONST integer(16) -- parent: 5
11+
8: FN_CALL uint32be@i@i:R0:4294967295u -- hash: 0x36dbf5f6d8719ced -- parent: 12
12+
7: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 7 }, type_value: integer(unknown) } -- parent: 8
13+
11: NE -- hash: 0xe0b33818592dcf55 -- parent: 12
14+
9: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 8 }, type_value: integer(unknown) } -- parent: 11
15+
10: CONST integer(1347092738) -- parent: 11
16+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
RULE test
2+
13: FOR_IN -- hash: 0xfc4f8d81a95824cd -- parent: None
3+
0: CONST integer(1) -- parent: 13
4+
1: CONST integer(2) -- parent: 13
5+
12: WITH -- hash: 0x2fa6e67ac30fd47a -- parent: 13
6+
6: FN_CALL uint32@i@i:R0:4294967295u -- hash: 0xb24700199f93dab8 -- parent: 12
7+
5: ADD -- hash: 0x685e7a2ba5a36f1d -- parent: 6
8+
3: PATTERN_OFFSET PatternIdx(0) INDEX -- hash: 0x7cb6e22690df7f1b -- parent: 5
9+
2: SYMBOL Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } -- parent: 3
10+
4: CONST integer(16) -- parent: 5
11+
8: FN_CALL uint32be@i@i:R0:4294967295u -- hash: 0x36dbf5f6d8719ced -- parent: 12
12+
7: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 7 }, type_value: integer(unknown) } -- parent: 8
13+
11: NE -- hash: 0xe0b33818592dcf55 -- parent: 12
14+
9: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 8 }, type_value: integer(unknown) } -- parent: 11
15+
10: CONST integer(1347092738) -- parent: 11
16+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
rule test {
2+
strings:
3+
$a = { 50 4B 05 06 }
4+
condition:
5+
for any i in (1..10): (
6+
with
7+
offset = uint32(@a[i] + 16),
8+
value = uint32be(offset): (
9+
value != 0x504b0102
10+
)
11+
)
12+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
RULE test
2+
13: FOR_IN -- hash: 0xfc4f8d81a95824cd -- parent: None
3+
0: CONST integer(1) -- parent: 13
4+
1: CONST integer(2) -- parent: 13
5+
12: WITH -- hash: 0x2fa6e67ac30fd47a -- parent: 13
6+
6: FN_CALL uint32@i@i:R0:4294967295u -- hash: 0xb24700199f93dab8 -- parent: 12
7+
5: ADD -- hash: 0x685e7a2ba5a36f1d -- parent: 6
8+
3: PATTERN_OFFSET PatternIdx(0) INDEX -- hash: 0x7cb6e22690df7f1b -- parent: 5
9+
2: SYMBOL Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } -- parent: 3
10+
4: CONST integer(16) -- parent: 5
11+
8: FN_CALL uint32be@i@i:R0:4294967295u -- hash: 0x36dbf5f6d8719ced -- parent: 12
12+
7: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 7 }, type_value: integer(unknown) } -- parent: 8
13+
11: NE -- hash: 0xe0b33818592dcf55 -- parent: 12
14+
9: SYMBOL Var { var: Var { frame_id: 2, ty: integer, index: 8 }, type_value: integer(unknown) } -- parent: 11
15+
10: CONST integer(1347092738) -- parent: 11
16+

lib/src/tests/mod.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,42 @@ fn with() {
727727
)
728728
"#
729729
);
730+
731+
#[cfg(feature = "test_proto2-module")]
732+
condition_true!(
733+
r#"with one = test_proto2.int32_one,
734+
two = test_proto2.add(one, 1),
735+
three = two + 1,
736+
four = 4: (
737+
738+
with seven = three + four: (
739+
seven == 7
740+
)
741+
)
742+
"#
743+
);
744+
745+
#[cfg(feature = "test_proto2-module")]
746+
condition_false!(
747+
r#"with undef_1 = uint32(100000),
748+
undef_2 = uint32(undef_1),
749+
one = 1: (
750+
undef_2 != one
751+
)
752+
"#
753+
);
754+
755+
#[cfg(feature = "test_proto2-module")]
756+
condition_false!(
757+
r#"for any i in (1..2): (
758+
with undef_1 = uint32(100000),
759+
undef_2 = uint32(undef_1) + i,
760+
one = i: (
761+
undef_2 != one
762+
)
763+
)
764+
"#
765+
);
730766
}
731767

732768
#[test]

0 commit comments

Comments
 (0)