Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RegexDiff X64] MihaZupan/runtime/regex-contains #977

Open
MihuBot opened this issue Feb 1, 2025 · 1 comment
Open

[RegexDiff X64] MihaZupan/runtime/regex-contains #977

MihuBot opened this issue Feb 1, 2025 · 1 comment

Comments

@MihuBot
Copy link
Owner

MihuBot commented Feb 1, 2025

Job completed in 15 minutes 45 seconds (remote runner delay: 1 minute 23 seconds).

Using arguments: regexdiff -NoPRLink

118 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"^[a-f0-9]{32}$" (4920 uses)
[GeneratedRegex("^[a-f0-9]{32}$")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      return false; // The input didn't match.
  }
"\"([a-fA-F0-9-\\{\\}]{36})\"" (569 uses)
[GeneratedRegex("\"([a-fA-F0-9-\\{\\}]{36})\"", RegexOptions.CultureInvariant)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 36).IndexOfAnyExcept(Utilities.s_ascii_20FF037E0000007E000028) >= 0)
+   if (slice.Slice(0, 36).ContainsAnyExcept(Utilities.s_ascii_20FF037E0000007E000028))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[a-z0-9]{24}$" (285 uses)
[GeneratedRegex("^[a-z0-9]{24}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 24).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign) >= 0)
+   if (slice.Slice(0, 24).ContainsAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign))
  {
      return false; // The input didn't match.
  }
"^[0-9a-f]{40}$" (202 uses)
[GeneratedRegex("^[0-9a-f]{40}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      return false; // The input didn't match.
  }
"\\A(?:[A-Z0-9]{17})\\z" (182 uses)
[GeneratedRegex("\\A(?:[A-Z0-9]{17})\\z")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 17).IndexOfAnyExcept(Utilities.s_asciiLettersUpperAndDigits) >= 0)
+   if (slice.Slice(0, 17).ContainsAnyExcept(Utilities.s_asciiLettersUpperAndDigits))
  {
      return false; // The input didn't match.
  }
"^\\\\((?<StoreLocation>CurrentUser|LocalMach ..." (167 uses)
[GeneratedRegex("^\\\\((?<StoreLocation>CurrentUser|LocalMachine)(\\\\(?<StoreName>[a-zA-Z]+)(\\\\(?<Thumbprint>[0-9a-f]{40}))?)?)?$")]
      goto LoopIterationNoMatch2;
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      goto LoopIterationNoMatch2;
  }
"IR[0-9]{24}" (144 uses)
[GeneratedRegex("IR[0-9]{24}", RegexOptions.IgnoreCase)]
  // Match a character in the set [0-9] exactly 24 times.
  {
-       if (slice.Slice(2, 24).IndexOfAnyExceptInRange('0', '9') >= 0)
+       if (slice.Slice(2, 24).ContainsAnyExceptInRange('0', '9'))
      {
          return false; // The input didn't match.
      }
"^committed\\s+changeset\\s+\\d+:(?<hash>[0-9 ..." (132 uses)
[GeneratedRegex("^committed\\s+changeset\\s+\\d+:(?<hash>[0-9a-f]{40})$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([ ..." (130 uses)
[GeneratedRegex("^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}(}|\\))?$|^({)?[0xA-Fa-f0-9]{3,10}(, {0,1}[0xA-Fa-f0-9]{3,6}){2}, {0,1}({)([0xA-Fa-f0-9]{3,4}, {0,1}){7}[0xA-Fa-f0-9]{3,4}(}})$")]
      goto AlternationBranch;
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      goto AlternationBranch;
  }
"asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9 ..." (99 uses)
[GeneratedRegex("asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9]+)(/(?<flags>[a-zA-Z0-9]*))?", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.

For more diff examples, see https://gist.github.com/MihuBot/6a5ceda1e682d042a2b554ec0cf98464

Total bytes of base: 53924200
Total bytes of diff: 53924088
Total bytes of delta: -112 (-0.00 % of base)
Total relative delta: -0.02
    diff is an improvement.
    relative diff is an improvement.
Sample source code for further analysis
const string JsonPath = "RegexResults-977.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/EogniYtA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

@MihuBot
Copy link
Owner Author

MihuBot commented Feb 1, 2025

@MihaZupan

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant