diff --git a/Directory.Build.props b/Directory.Build.props new file mode 100644 index 0000000..ed14ea7 --- /dev/null +++ b/Directory.Build.props @@ -0,0 +1,5 @@ + + + true + + diff --git a/README.md b/README.md index 1ab8df7..764f9b0 100644 --- a/README.md +++ b/README.md @@ -97,9 +97,39 @@ We use optimizations that prevent quadratic behavior in scenarios like the patte matching against the text `aaaaaaaaaaaaaaa...aaaa...aaa`. Similarly, for the `a/**/a/**/a/**/.../a/**/a/**/a/**/b` pattern matching against `a/a/a/a/.../a/.../a`. +## File traversal + +The `Files` class provides functionality for traversing the file system and retrieving lists of files and directories based on specified glob patterns. This allows for flexible and efficient file and directory enumeration. + +```csharp +using Ramstack.Globbing.Traversal; + +// List all *.cs files +var files = Files.EnumerateFiles(@"/path/to/directory", "**/*.cs"); +foreach (var file in files) + Console.WriteLine(file); + +// List all *.cs files except in tests directory +var files = Files.EnumerateFiles(@"/path/to/directory", "**/*.cs", "tests"); +foreach (var file in files) + Console.WriteLine(file); +``` +Support for multiple patterns is also included: + +```csharp +using Ramstack.Globbing.Traversal; + +// List all *.cs files +var files = Files.EnumerateFiles(@"/path/to/directory", ["src/**/*.cs", "lib/**/*.cs"], ["**/tests"]); +foreach (var file in files) + Console.WriteLine(file); +``` + ## Changelog ### 2.0.0 +* Added the ability to retrieve a list of files and directories based on a specified glob pattern. + **BREAKING CHANGE** To improve code readability and adherence to .NET conventions, the order of parameters in the `IsMatch` method has been changed. diff --git a/Ramstack.Globbing/Matcher.cs b/Ramstack.Globbing/Matcher.cs index 06628af..c5c71b2 100644 --- a/Ramstack.Globbing/Matcher.cs +++ b/Ramstack.Globbing/Matcher.cs @@ -6,13 +6,54 @@ namespace Ramstack.Globbing; /// /// Provides functionality for shell-style glob matching using the glob pattern syntax. -/// Supported meta-characters include '*', '?', '\' and '[', ']'. And inside character classes '-', '!' and ']'. -/// The '.' and '..' symbols do not have any special treatment and are processed as regular characters for matching. -/// Character classes can be negated by prefixing them with '!', such as [!0-9], which matches all characters except digits. -/// Brace patterns are supported, including nested brace pattern: {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} -/// An empty pattern in brace expansion {} is allowed, as well as variations like {.cs,}, {name,,file}, or {,.cs}. -/// Leading and trailing separators are ignored, and consecutive delimiters are counted as one. /// +/// +/// +/// +/// +/// Supported meta-characters include '*', '?', '\' and '[', ']'. +/// And inside character classes '-', '!' and ']'. +/// +/// +/// +/// +/// The '.' and '..' symbols do not have any special treatment and are processed +/// as regular characters for matching. +/// +/// +/// +/// +/// Character classes can be negated by prefixing them with '!', such as [!0-9], +/// which matches all characters except digits. +/// +/// +/// +/// +/// Brace patterns are supported, including nested brace pattern: +/// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} +/// +/// +/// +/// +/// An empty pattern in brace expansion {} is allowed, as well as variations +/// like {.cs,}, {name,,file}, or {,.cs}. +/// +/// +/// +/// Leading and trailing separators are ignored. +/// +/// +/// Consecutive separators are counted as one. +/// +/// +/// +/// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. +/// It can be used at the beginning, middle, or end of a pattern, for example, +/// "**/file.txt", "dir/**/*.txt", "dir/**". +/// +/// +/// +/// public static unsafe class Matcher { /// diff --git a/Ramstack.Globbing/Traversal/Files.cs b/Ramstack.Globbing/Traversal/Files.cs new file mode 100644 index 0000000..44cdbc3 --- /dev/null +++ b/Ramstack.Globbing/Traversal/Files.cs @@ -0,0 +1,508 @@ +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ramstack.Globbing.Traversal; + +/// +/// Provides methods for enumerating files and directories based on glob patterns and optional exclusions. +/// +public static class Files +{ + /// + /// Enumerates files in a directory that match the specified glob pattern. + /// + /// The relative or absolute path to the directory to search. + /// The glob pattern to match against the names of files. + /// Optional glob pattern to exclude files. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the files in the directory specified by + /// and that match the specified glob pattern. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateFiles(string path, string pattern, string? exclude = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, [pattern], ToExcludes(exclude), flags, SearchTarget.Files, depth: 0); + + /// + /// Enumerates files in a directory that match any of the specified glob patterns. + /// + /// The relative or absolute path to the directory to search. + /// An array of glob patterns to match against the names of files. + /// Optional array of glob patterns to exclude files. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the files in the directory specified by + /// and that match the specified glob patterns. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateFiles(string path, string[] patterns, string[]? excludes = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, patterns, excludes ?? [], flags, SearchTarget.Files, depth: 0); + + /// + /// Enumerates directories in a directory that match the specified glob pattern. + /// + /// The relative or absolute path to the directory to search. + /// The glob pattern to match against the names of directories. + /// Optional glob pattern to exclude directories. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the directories in the directory specified by + /// and that match the specified glob pattern. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateDirectories(string path, string pattern, string? exclude = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, [pattern], ToExcludes(exclude), flags, SearchTarget.Directories, depth: 0); + + /// + /// Enumerates directories in a directory that match any of the specified glob patterns. + /// + /// The relative or absolute path to the directory to search. + /// An array of glob patterns to match against the names of directories. + /// Optional array of glob patterns to exclude directories. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the directories in the directory specified by + /// and that match the specified glob patterns. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateDirectories(string path, string[] patterns, string[]? excludes = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, patterns, excludes ?? [], flags, SearchTarget.Directories, depth: 0); + + /// + /// Returns an enumerable collection of file names and directory names that match a search pattern in a specified path. + /// + /// The relative or absolute path to the directory to search. + /// The glob pattern to match against the names of file-system entries in path. + /// Optional glob pattern to exclude file-system entries. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the file-system entries in the directory specified by + /// and that match the specified glob pattern. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateFileSystemEntries(string path, string pattern, string? exclude = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, [pattern], ToExcludes(exclude), flags, SearchTarget.Both, depth: 0); + + /// + /// Enumerates file-system entries (files and directories) in a directory that match any of the specified glob patterns. + /// + /// The relative or absolute path to the directory to search. + /// An array of glob patterns to match against the names of file-system entries. + /// Optional array of glob patterns to exclude file-system entries. + /// The matching options to use. Default is . + /// + /// An enumerable collection of the full names for the file-system entries in the directory specified by + /// and that match the specified glob patterns. + /// + /// + /// Glob pattern: + /// + /// + /// + /// Supported meta-characters include '*', '?', '\' and '[', ']'. + /// And inside character classes '-', '!' and ']'. + /// + /// + /// + /// + /// The '.' and '..' symbols do not have any special treatment and are processed + /// as regular characters for matching. + /// + /// + /// + /// + /// Character classes can be negated by prefixing them with '!', such as [!0-9], + /// which matches all characters except digits. + /// + /// + /// + /// + /// Brace patterns are supported, including nested brace pattern: + /// {file,dir,name}, {file-1.{c,cpp},file-2.{cs,f}} + /// + /// + /// + /// + /// An empty pattern in brace expansion {} is allowed, as well as variations + /// like {.cs,}, {name,,file}, or {,.cs}. + /// + /// + /// + /// Leading and trailing separators are ignored. + /// + /// + /// Consecutive separators are counted as one. + /// + /// + /// + /// The '**' sequence in the glob pattern can be used to match zero or more directories and subdirectories. + /// It can be used at the beginning, middle, or end of a pattern, for example, + /// "**/file.txt", "dir/**/*.txt", "dir/**". + /// + /// + /// + /// + public static IEnumerable EnumerateFileSystemEntries(string path, string[] patterns, string[]? excludes = null, MatchFlags flags = MatchFlags.Auto) => + EnumerateEntries(path, patterns, excludes ?? [], flags, SearchTarget.Both, depth: 0); + + private static IEnumerable EnumerateEntries(string path, string[] patterns, string[] excludes, MatchFlags flags, SearchTarget target, int depth) + { + path = Path.GetFullPath(path); + return EnumerateEntriesRecursive(path, path, patterns, excludes, flags, target, depth); + } + + private static IEnumerable EnumerateEntriesRecursive(string basePath, string directory, string[] patterns, string[] excludes, MatchFlags flags, SearchTarget target, int depth) + { + foreach (var entry in Directory.EnumerateFileSystemEntries(directory)) + { + var current = Directory.Exists(entry) + ? SearchTarget.Directories + : SearchTarget.Files; + + var normalizedPath = MemoryMarshal + .CreateReadOnlySpan( + length: entry.Length - basePath.Length, + reference: ref Unsafe.Add( + ref Unsafe.AsRef(in entry.GetPinnableReference()), + basePath.Length)) + .ToString(); + + if ((target & current) != 0 + && IsLeafMatch(normalizedPath, excludes, flags) == false + && IsLeafMatch(normalizedPath, patterns, flags)) + yield return entry; + + if (current != SearchTarget.Directories) + continue; + + if (IsLeafMatch(normalizedPath, excludes, flags)) + continue; + + if (!IsPartialMatch(normalizedPath, patterns, flags, depth)) + continue; + + foreach (var e in EnumerateEntriesRecursive(basePath, entry, patterns, excludes, flags, target, depth + 1)) + yield return e; + } + } + + private static bool IsLeafMatch(string fullName, string[] patterns, MatchFlags flags) + { + foreach (var pattern in patterns) + if (Matcher.IsMatch(fullName, pattern, flags)) + return true; + + return false; + } + + private static bool IsPartialMatch(string path, string[] patterns, MatchFlags flags, int depth) + { + foreach (var pattern in patterns) + if (Matcher.IsMatch(path, GetPartialPattern(pattern, depth), flags)) + return true; + + return false; + } + + private static ReadOnlySpan GetPartialPattern(string pattern, int depth) + { + ref var s = ref Unsafe.AsRef(in pattern.GetPinnableReference()); + ref var e = ref Unsafe.Add(ref s, pattern.Length); + + while (Unsafe.IsAddressLessThan(ref s, ref e) && s == '/') + s = ref Unsafe.Add(ref s, 1); + + var separator = false; + var i = (nint)0; + + for (; i < pattern.Length; i++) + { + if (Unsafe.Add(ref s, i) == '/') + { + separator = true; + if (depth == 0) + break; + } + else if (separator) + { + separator = false; + depth--; + + if (Unsafe.As(ref Unsafe.Add(ref s, i)) == 0x2a002a) + { + if (Unsafe.Add(ref s, i + 2) == '/' || i + 2 >= pattern.Length) + { + i += 2; + break; + } + } + } + } + + return MemoryMarshal.CreateReadOnlySpan(ref s, (int)i); + } + + private static string[] ToExcludes(string? exclude) => + exclude is not null ? [exclude] : []; + + #region Inner type: SearchTarget + + /// + /// Specifies the search targets for enumeration. + /// + [Flags] + private enum SearchTarget + { + /// + /// Search for files only. + /// + Files = 1, + + /// + /// Search for directories only. + /// + Directories = 2, + + /// + /// Search for both files and directories. + /// + Both = Files | Directories + } + + #endregion +}