From 5b678aa469fd1c6e008b47859c7e629c9cd8088f Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 11 Oct 2023 20:42:34 -0400 Subject: [PATCH] pacer/docket_report.py: Explain DATE_REGEX a little better It's not a 100% obvious regexp, especially given the unicode characters in it. --- juriscraper/pacer/docket_report.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/juriscraper/pacer/docket_report.py b/juriscraper/pacer/docket_report.py index 8aef70820..69b4a93da 100644 --- a/juriscraper/pacer/docket_report.py +++ b/juriscraper/pacer/docket_report.py @@ -36,6 +36,9 @@ class BaseDocketReport: little class as a mixin with the common components. """ + # A date is one or more characters that are members of the class + # of endashes (U+2014), digits, ASCII dashes (U+002D), emdashes + # (U+2013), and ASCII slashes. DATE_REGEX = r"[—\d\-–/]+" date_entered_regex = re.compile(r"Entered:\s+(%s)" % DATE_REGEX)