From a767940ee940d90486007442709510a1ad718e66 Mon Sep 17 00:00:00 2001 From: Oscar Walter Date: Wed, 25 Sep 2024 08:55:45 +0200 Subject: [PATCH] Fix false positive for multi dep single use statements (#120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves #91 This also resolves some false detections of `not_my_dep::my_dep_name::stuff_in_my_dep` 😊 I couldn't find a way to avoid detection of `futures` in with the single line regexp ```rust pub use { async_trait::{mod1, dep2}, not_futures::{ futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}} }, reqwest, }; ``` This update does come at a small performance downside (that might be resolved by not instantiating a regexp per dependency?). On our repo of ~500k lines of machete goes from ~45s user time to ~48s of user time. --------- Co-authored-by: Elrendio Co-authored-by: Benjamin Bouvier --- src/search_unused.rs | 116 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 108 insertions(+), 8 deletions(-) diff --git a/src/search_unused.rs b/src/search_unused.rs index c88db1b..0833c31 100644 --- a/src/search_unused.rs +++ b/src/search_unused.rs @@ -79,23 +79,40 @@ fn make_line_regexp(name: &str) -> String { // Breaking down this regular expression: given a line, // - `use (::)?(?i){name}(?-i)(::|;| as)`: matches `use foo;`, `use foo::bar`, `use foo as bar;`, with // an optional "::" in front of the crate's name. - // - `\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. `\b` means word boundary, so - // putting it before the crate's name ensures there's no polluting prefix. + // - `(?:[^:]|^|\W::)\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. To ensure there's no polluting + // prefix we add `(?:[^:]|^|\W::)\b`, meaning that the crate name must be prefixed by either: + // * Not a `:` (therefore not a sub module) + // * The start of a line + // * Not a word character followed by `::` (to allow ::my_crate) // - `extern crate (?i){name}(?-i)( |;)`: matches `extern crate foo`, or `extern crate foo as bar`. // - `(?i){name}(?-i)` makes the match against the crate's name case insensitive format!( - r#"use (::)?(?i){name}(?-i)(::|;| as)|\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"# + r#"use (::)?(?i){name}(?-i)(::|;| as)|(?:[^:]|^|\W::)\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"# ) } fn make_multiline_regexp(name: &str) -> String { // Syntax documentation: https://docs.rs/regex/latest/regex/#syntax // - // Breaking down this Terrible regular expression: tries to match compound `use as` statements, - // as in `use { X as Y };`, with possibly multiple-lines in between. Will match the first `};` - // that it finds, which *should* be the end of the use statement, but oh well. - // `(?i){name}(?-i)` makes the match against the crate's name case insensitive. - format!(r#"use \{{\s[^;]*(?i){name}(?-i)\s*as\s*[^;]*\}};"#) + // Breaking down this Terrible regular expression: tries to match uses of the crate's name in + // compound `use` statement accross multiple lines. + // + // It's split into 3 parts: + // 1. Matches modules before the usage of the crate's name: `\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*` + // 2. Matches the crate's name with optional sub-modules: `(::)?{name}{sub_modules_match}\s*` + // 3. Matches modules after the usage of the crate's name: `(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*` + // + // In order to avoid false usage detection of `not_my_dep::my_dep` the regexp ensures that the + // crate's name is at the top level of the use statement. However, it's not possible with + // regexp to allow any number of matching `{` and `}` before the crate's usage (rust regexp + // engine doesn't support recursion). Therefore, sub modules are authorized up to 4 levels + // deep. + + let sub_modules_match = r#"(?:::\w+)*(?:::\*|\s+as\s+\w+|::\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{[^{}]*\})?[^{}]*)*\})?[^{}]*)*\})?[^{}]*)*\})?"#; + + format!( + r#"use \{{\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*(::)?{name}{sub_modules_match}\s*(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*\}};"# + ) } /// Returns all the paths to the Rust source files for a crate contained at the given path. @@ -666,6 +683,89 @@ pub use futures::future; "# )?); + // multi-dep single use statements + assert!(test_one( + "futures", + r#"pub use {async_trait, futures, reqwest};"# + )?); + + // multi-dep single use statements with :: + assert!(test_one( + "futures", + r#"pub use {async_trait, ::futures, reqwest};"# + )?); + + // No false usage detection of `not_my_dep::my_dep` on compound imports + assert!(!test_one( + "futures", + r#"pub use {async_trait, not_futures::futures, reqwest};"# + )?); + + // No false usage detection of `not_my_dep::my_dep` on multiple lines + assert!(!test_one( + "futures", + r#" +pub use { + async_trait, + not_futures::futures, + reqwest, +};"# + )?); + + // No false usage detection on single line `not_my_dep::my_dep` + assert!(!test_one( + "futures", + r#"use not_futures::futures::stuff_in_futures;"# + )?); + + // multi-dep single use statements with nesting + assert!(test_one( + "futures", + r#"pub use { + async_trait::{mod1, dep2}, + futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}}, + reqwest, + };"# + )?); + + // multi-dep single use statements with star import and renaming + assert!(test_one( + "futures", + r#"pub use { + async_trait::sub_mod::*, + futures as futures_renamed, + reqwest, + };"# + )?); + + // multi-dep single use statements with complex imports and renaming + assert!(test_one( + "futures", + r#"pub use { + other_dep::{ + star_mod::*, + unnamed_import::{UnnamedTrait as _, other_mod}, + renamed_import as new_name, + sub_import::{mod1, mod2}, + }, + futures as futures_renamed, + reqwest, + };"# + )?); + + // No false usage detection of `not_my_dep::my_dep` with nesting + assert!(!test_one( + "futures", + r#"pub use { + async_trait::{mod1, dep2}, + not_futures::futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}}, + reqwest, + };"# + )?); + + // Detects top level usage + assert!(test_one("futures", r#" ::futures::mod1"#)?); + Ok(()) }