From 9f5bfe24eb254fbe3f745062a4c9ce1c76ff800f Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Sat, 21 Sep 2024 12:46:08 +0100 Subject: [PATCH 1/2] Implement lint for regex::Regex compilation inside a loop --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + clippy_lints/src/regex.rs | 62 +++++++++++++++++++++++++++++- tests/ui/regex.rs | 28 +++++++++++++- tests/ui/regex.stderr | 52 ++++++++++++++++++++++++- 5 files changed, 141 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e5d1688e4a7..c385405e08a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5873,6 +5873,7 @@ Released 2018-09-13 [`ref_in_deref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_in_deref [`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref [`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns +[`regex_creation_in_loops`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_creation_in_loops [`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro [`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params [`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index 16c64830e70d..25a224a47ff7 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -637,6 +637,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::ref_patterns::REF_PATTERNS_INFO, crate::reference::DEREF_ADDROF_INFO, crate::regex::INVALID_REGEX_INFO, + crate::regex::REGEX_CREATION_IN_LOOPS_INFO, crate::regex::TRIVIAL_REGEX_INFO, crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO, crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO, diff --git a/clippy_lints/src/regex.rs b/clippy_lints/src/regex.rs index f6ef02b7c233..1671737db018 100644 --- a/clippy_lints/src/regex.rs +++ b/clippy_lints/src/regex.rs @@ -55,6 +55,44 @@ declare_clippy_lint! { "trivial regular expressions" } +declare_clippy_lint! { + /// ### What it does + /// + /// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal. + /// + /// ### Why is this bad? + /// + /// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times. + /// This is documented as an antipattern [on the regex documentation](https://docs.rs/regex/latest/regex/#avoid-re-compiling-regexes-especially-in-a-loop) + /// + /// ### Example + /// ```no_run + /// # let haystacks = [""]; + /// # const MY_REGEX: &str = "a.b"; + /// for haystack in haystacks { + /// let regex = regex::Regex::new(MY_REGEX).unwrap(); + /// if regex.is_match(haystack) { + /// // Perform operation + /// } + /// } + /// ``` + /// can be replaced with + /// ```no_run + /// # let haystacks = [""]; + /// # const MY_REGEX: &str = "a.b"; + /// let regex = regex::Regex::new(MY_REGEX).unwrap(); + /// for haystack in haystacks { + /// if regex.is_match(haystack) { + /// // Perform operation + /// } + /// } + /// ``` + #[clippy::version = "1.83.0"] + pub REGEX_CREATION_IN_LOOPS, + perf, + "regular expression compilation performed in a loop" +} + #[derive(Copy, Clone)] enum RegexKind { Unicode, @@ -66,9 +104,10 @@ enum RegexKind { #[derive(Default)] pub struct Regex { definitions: DefIdMap, + loop_stack: Vec, } -impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]); +impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]); impl<'tcx> LateLintPass<'tcx> for Regex { fn check_crate(&mut self, cx: &LateContext<'tcx>) { @@ -96,12 +135,33 @@ impl<'tcx> LateLintPass<'tcx> for Regex { && let Some(def_id) = path_def_id(cx, fun) && let Some(regex_kind) = self.definitions.get(&def_id) { + if let Some(&loop_span) = self.loop_stack.last() + && (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some()) + { + span_lint_and_help( + cx, + REGEX_CREATION_IN_LOOPS, + fun.span, + "compiling a regex in a loop", + Some(loop_span), + "move the regex construction outside this loop", + ); + } + match regex_kind { RegexKind::Unicode => check_regex(cx, arg, true), RegexKind::UnicodeSet => check_set(cx, arg, true), RegexKind::Bytes => check_regex(cx, arg, false), RegexKind::BytesSet => check_set(cx, arg, false), } + } else if let ExprKind::Loop(_, _, _, span) = expr.kind { + self.loop_stack.push(span); + } + } + + fn check_expr_post(&mut self, _: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { + if matches!(expr.kind, ExprKind::Loop(..)) { + self.loop_stack.pop(); } } } diff --git a/tests/ui/regex.rs b/tests/ui/regex.rs index 4fb6c08bb449..3352239892c5 100644 --- a/tests/ui/regex.rs +++ b/tests/ui/regex.rs @@ -5,7 +5,7 @@ clippy::needless_borrow, clippy::needless_borrows_for_generic_args )] -#![warn(clippy::invalid_regex, clippy::trivial_regex)] +#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_creation_in_loops)] extern crate regex; @@ -118,7 +118,33 @@ fn trivial_regex() { let _ = BRegex::new(r"\b{start}word\b{end}"); } +fn regex_creation_in_loops() { + loop { + let regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + let regex = BRegex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + #[allow(clippy::regex_creation_in_loops)] + let allowed_regex = Regex::new("a.b"); + + if true { + let regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + } + + for _ in 0..10 { + let nested_regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + } + } + + for i in 0..10 { + let dependant_regex = Regex::new(&format!("{i}")); + } +} + fn main() { syntax_error(); trivial_regex(); + regex_creation_in_loops(); } diff --git a/tests/ui/regex.stderr b/tests/ui/regex.stderr index e936208d8d7b..38e14b163167 100644 --- a/tests/ui/regex.stderr +++ b/tests/ui/regex.stderr @@ -195,5 +195,55 @@ LL | let binary_trivial_empty = BRegex::new("^$"); | = help: consider using `str::is_empty` -error: aborting due to 24 previous errors +error: compiling a regex in a loop + --> tests/ui/regex.rs:123:21 + | +LL | let regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + = note: `-D clippy::regex-creation-in-loops` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]` + +error: compiling a regex in a loop + --> tests/ui/regex.rs:125:21 + | +LL | let regex = BRegex::new("a.b"); + | ^^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + +error: compiling a regex in a loop + --> tests/ui/regex.rs:131:25 + | +LL | let regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + +error: compiling a regex in a loop + --> tests/ui/regex.rs:136:32 + | +LL | let nested_regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:135:9 + | +LL | for _ in 0..10 { + | ^^^^^^^^^^^^^^ + +error: aborting due to 28 previous errors From 196dbcf6d5fa474e9dd15a0bbaad1ced3e0f28fe Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Sat, 5 Oct 2024 17:08:33 +0100 Subject: [PATCH 2/2] Avoid linting for Regexes compiled in items defined in loops --- clippy_lints/src/regex.rs | 11 ++++++----- tests/ui/regex.rs | 2 ++ tests/ui/regex.stderr | 10 +++++----- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/clippy_lints/src/regex.rs b/clippy_lints/src/regex.rs index 1671737db018..4c8cd9669414 100644 --- a/clippy_lints/src/regex.rs +++ b/clippy_lints/src/regex.rs @@ -6,7 +6,7 @@ use clippy_utils::source::SpanRangeExt; use clippy_utils::{def_path_def_ids, path_def_id, paths}; use rustc_ast::ast::{LitKind, StrStyle}; use rustc_hir::def_id::DefIdMap; -use rustc_hir::{BorrowKind, Expr, ExprKind}; +use rustc_hir::{BorrowKind, Expr, ExprKind, OwnerId}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::impl_lint_pass; use rustc_span::{BytePos, Span}; @@ -104,7 +104,7 @@ enum RegexKind { #[derive(Default)] pub struct Regex { definitions: DefIdMap, - loop_stack: Vec, + loop_stack: Vec<(OwnerId, Span)>, } impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]); @@ -135,7 +135,8 @@ impl<'tcx> LateLintPass<'tcx> for Regex { && let Some(def_id) = path_def_id(cx, fun) && let Some(regex_kind) = self.definitions.get(&def_id) { - if let Some(&loop_span) = self.loop_stack.last() + if let Some(&(loop_item_id, loop_span)) = self.loop_stack.last() + && loop_item_id == fun.hir_id.owner && (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some()) { span_lint_and_help( @@ -154,8 +155,8 @@ impl<'tcx> LateLintPass<'tcx> for Regex { RegexKind::Bytes => check_regex(cx, arg, false), RegexKind::BytesSet => check_set(cx, arg, false), } - } else if let ExprKind::Loop(_, _, _, span) = expr.kind { - self.loop_stack.push(span); + } else if let ExprKind::Loop(block, _, _, span) = expr.kind { + self.loop_stack.push((block.hir_id.owner, span)); } } diff --git a/tests/ui/regex.rs b/tests/ui/regex.rs index 3352239892c5..f607a2d50c6d 100644 --- a/tests/ui/regex.rs +++ b/tests/ui/regex.rs @@ -120,6 +120,8 @@ fn trivial_regex() { fn regex_creation_in_loops() { loop { + static STATIC_REGEX: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new("a.b").unwrap()); + let regex = Regex::new("a.b"); //~^ ERROR: compiling a regex in a loop let regex = BRegex::new("a.b"); diff --git a/tests/ui/regex.stderr b/tests/ui/regex.stderr index 38e14b163167..18dd538c68b4 100644 --- a/tests/ui/regex.stderr +++ b/tests/ui/regex.stderr @@ -196,7 +196,7 @@ LL | let binary_trivial_empty = BRegex::new("^$"); = help: consider using `str::is_empty` error: compiling a regex in a loop - --> tests/ui/regex.rs:123:21 + --> tests/ui/regex.rs:125:21 | LL | let regex = Regex::new("a.b"); | ^^^^^^^^^^ @@ -210,7 +210,7 @@ LL | loop { = help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]` error: compiling a regex in a loop - --> tests/ui/regex.rs:125:21 + --> tests/ui/regex.rs:127:21 | LL | let regex = BRegex::new("a.b"); | ^^^^^^^^^^^ @@ -222,7 +222,7 @@ LL | loop { | ^^^^ error: compiling a regex in a loop - --> tests/ui/regex.rs:131:25 + --> tests/ui/regex.rs:133:25 | LL | let regex = Regex::new("a.b"); | ^^^^^^^^^^ @@ -234,13 +234,13 @@ LL | loop { | ^^^^ error: compiling a regex in a loop - --> tests/ui/regex.rs:136:32 + --> tests/ui/regex.rs:138:32 | LL | let nested_regex = Regex::new("a.b"); | ^^^^^^^^^^ | help: move the regex construction outside this loop - --> tests/ui/regex.rs:135:9 + --> tests/ui/regex.rs:137:9 | LL | for _ in 0..10 { | ^^^^^^^^^^^^^^