From a978447f233c4d284bb0817565f897a447f6534d Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 21:18:13 -0400 Subject: [PATCH 01/15] rss_feeds.py: data() docstring Attempt to explain why we override this function. I...actually don't understand why we do, so this explanation may be wrong. It seems like we could have fit within the BaseDocketReport framework of returning our information as `docket_entries` rather than `data` but maybe I'm missing something. --- juriscraper/pacer/rss_feeds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index f7124cc25..ae6471727 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -98,8 +98,8 @@ def _parse_text(self, text): @property def data(self): - """Override this to create a list of docket-like objects instead of the - usual dict that is usually provided by the docket report. + """Return a list of docket-like objects instead of the usual dict that + is usually provided by the BaseDocketReport superclass. """ if self._data is not None: return self._data From 3b60e8fbd67c49b04c978273a59f90e6c6efd555 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 16 May 2018 08:52:45 -0400 Subject: [PATCH 02/15] rss_feeds: Merge adjacent qualifying entries rather than overwriting --- juriscraper/pacer/rss_feeds.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index ae6471727..1745736a2 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -100,17 +100,48 @@ def _parse_text(self, text): def data(self): """Return a list of docket-like objects instead of the usual dict that is usually provided by the BaseDocketReport superclass. + + When CMECF generates the RSS feed, it breaks up items with + multiple consecutive entries into multiple RSS items with + identical timestamp/id/title. We reverse that and recombine + those items. """ if self._data is not None: return self._data data_list = [] + prevdata = None + preventry = None for entry in self.feed.entries: data = self.metadata(entry) + + de = self.docket_entries(entry) + # If this entry and the immediately prior entry match + # in metadata, then add the current description to + # the previous entry's and continue the loop. + if ( + preventry and + prevdata[u'docket_entries'] and + entry.title == preventry.title and + entry.link == preventry.link and + entry.id == preventry.id and + entry.published == preventry.published and + len(de) > 0 # xxx + ): + # xxx we rely on the fact that there's only ever one + # item in this array, which is true but flawed + prevdata['docket_entries'][0][u'short_description'] += ( + ' AND ' + de[0][u'short_description']) + continue + data[u'parties'] = None data[u'docket_entries'] = self.docket_entries(entry) - if data[u'docket_entries'] and data['docket_number']: + if data[u'docket_entries'] and data[u'docket_number']: data_list.append(data) + + preventry = entry + prevdata = data + self._data = data_list return data_list From 49cbaf21226498f7d5bca3f5f417f99b4133b753 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 16 May 2018 09:21:00 -0400 Subject: [PATCH 03/15] rss_feeds: use twin_entries() instead of stack of a==b --- juriscraper/pacer/rss_feeds.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 1745736a2..0621cd5b8 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -109,6 +109,11 @@ def data(self): if self._data is not None: return self._data + def twin_entries(a, b): + fields = ['title', 'link', 'id', 'published'] + matching_fields = (a[f] == b[f] for f in fields) + return all(matching_fields) + data_list = [] prevdata = None preventry = None @@ -122,10 +127,7 @@ def data(self): if ( preventry and prevdata[u'docket_entries'] and - entry.title == preventry.title and - entry.link == preventry.link and - entry.id == preventry.id and - entry.published == preventry.published and + twin_entries(entry, preventry) and len(de) > 0 # xxx ): # xxx we rely on the fact that there's only ever one From 1a1c476d46f4c30973c3f8f031c06489205ed33c Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 16 May 2018 11:16:05 -0400 Subject: [PATCH 04/15] Use previous_and_next() Snazzier than setting an old= var at the end of the loop. --- juriscraper/pacer/rss_feeds.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 0621cd5b8..1457c2078 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -12,6 +12,7 @@ from ..lib.html_utils import html_unescape from ..lib.log_tools import make_default_logger from ..lib.string_utils import harmonize, clean_string +from ..lib.utils import previous_and_next logger = make_default_logger() @@ -115,12 +116,11 @@ def twin_entries(a, b): return all(matching_fields) data_list = [] - prevdata = None - preventry = None - for entry in self.feed.entries: + for preventry, entry, nextentry in previous_and_next(self.feed.entries): data = self.metadata(entry) de = self.docket_entries(entry) + prevdata = data_list[-1] if len(data_list) else None # If this entry and the immediately prior entry match # in metadata, then add the current description to # the previous entry's and continue the loop. @@ -141,9 +141,6 @@ def twin_entries(a, b): if data[u'docket_entries'] and data[u'docket_number']: data_list.append(data) - preventry = entry - prevdata = data - self._data = data_list return data_list From 358bf2d17c77e4ba5de69f74babbbc08b46eed4f Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 20:17:29 -0400 Subject: [PATCH 05/15] Bye bye twin_entries(), you're not readable --- juriscraper/pacer/rss_feeds.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 1457c2078..7f91ab90c 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -110,11 +110,6 @@ def data(self): if self._data is not None: return self._data - def twin_entries(a, b): - fields = ['title', 'link', 'id', 'published'] - matching_fields = (a[f] == b[f] for f in fields) - return all(matching_fields) - data_list = [] for preventry, entry, nextentry in previous_and_next(self.feed.entries): data = self.metadata(entry) @@ -125,10 +120,13 @@ def twin_entries(a, b): # in metadata, then add the current description to # the previous entry's and continue the loop. if ( - preventry and - prevdata[u'docket_entries'] and - twin_entries(entry, preventry) and - len(de) > 0 # xxx + preventry + and prevdata[u'docket_entries'] + and entry.title == preventry.title + and entry.link == preventry.link + and entry.id == preventry.id + and entry.published == preventry.published + and len(de) > 0 # xxx ): # xxx we rely on the fact that there's only ever one # item in this array, which is true but flawed From d59e0072c5a4a539444cfdf1634a372c53901d26 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 16 May 2018 12:15:31 -0400 Subject: [PATCH 06/15] prevdata->lastdata, preventry -> previous, de -> data[de] prevdata -> lastdata (well, not really) preventry -> previous_entry Elminate `de` and just use data['docket_entries'] which we moved up --- juriscraper/pacer/rss_feeds.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 7f91ab90c..ec7f196d3 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -111,27 +111,27 @@ def data(self): return self._data data_list = [] - for preventry, entry, nextentry in previous_and_next(self.feed.entries): + for previous_entry, entry, next_entry in previous_and_next( + self.feed.entries): data = self.metadata(entry) - de = self.docket_entries(entry) - prevdata = data_list[-1] if len(data_list) else None + data[u'docket_entries'] = self.docket_entries(entry) # If this entry and the immediately prior entry match # in metadata, then add the current description to # the previous entry's and continue the loop. if ( - preventry - and prevdata[u'docket_entries'] - and entry.title == preventry.title - and entry.link == preventry.link - and entry.id == preventry.id - and entry.published == preventry.published - and len(de) > 0 # xxx + previous_entry and data_list[-1] + and data_list[-1][u'docket_entries'] + and entry.title == previous_entry.title + and entry.link == previous_entry.link + and entry.id == previous_entry.id + and entry.published == previous_entry.published + and len(data['docket_entries') > 0 # xxx ): # xxx we rely on the fact that there's only ever one # item in this array, which is true but flawed - prevdata['docket_entries'][0][u'short_description'] += ( - ' AND ' + de[0][u'short_description']) + data_list[-1][u'docket_entries'][0][u'short_description'] += ( + ' AND ' + data[u'docket_entries'][0][u'short_description']) continue data[u'parties'] = None From e7ace3b8a171c7e509c9ab2f9bcede94a3b60290 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 20:29:33 -0400 Subject: [PATCH 07/15] len(x) -> x --- juriscraper/pacer/rss_feeds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index ec7f196d3..2b1fab3c4 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -126,7 +126,7 @@ def data(self): and entry.link == previous_entry.link and entry.id == previous_entry.id and entry.published == previous_entry.published - and len(data['docket_entries') > 0 # xxx + and data['docket_entries'] ): # xxx we rely on the fact that there's only ever one # item in this array, which is true but flawed From 05d707862790abb530d69b4fdc0b7f6b908c8126 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 21:04:28 -0400 Subject: [PATCH 08/15] Remove xxx, add assert() invariant --- juriscraper/pacer/rss_feeds.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 2b1fab3c4..7fd00a29e 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -115,7 +115,15 @@ def data(self): self.feed.entries): data = self.metadata(entry) + # We are guaranteed to only have a single docket entry for each + # RSS item, and thus we use data['docket_entries'][0] below. + # Coming up with an alternative data representation here and + # then transforming it into what CL expects after we're done + # iterating over the list is just not worth the bother. data[u'docket_entries'] = self.docket_entries(entry) + # BUT: Guarantee this condition persists into the future: + assert len(data[u'docket_entries']) <= 1 + # If this entry and the immediately prior entry match # in metadata, then add the current description to # the previous entry's and continue the loop. @@ -128,8 +136,6 @@ def data(self): and entry.published == previous_entry.published and data['docket_entries'] ): - # xxx we rely on the fact that there's only ever one - # item in this array, which is true but flawed data_list[-1][u'docket_entries'][0][u'short_description'] += ( ' AND ' + data[u'docket_entries'][0][u'short_description']) continue From 5ea25af9d35f9a490d17c71bee71f8cbdd594c29 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 21:06:09 -0400 Subject: [PATCH 09/15] linebreak on AND for clarity --- juriscraper/pacer/rss_feeds.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 7fd00a29e..6df19ad62 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -137,7 +137,8 @@ def data(self): and data['docket_entries'] ): data_list[-1][u'docket_entries'][0][u'short_description'] += ( - ' AND ' + data[u'docket_entries'][0][u'short_description']) + ' AND ' + + data[u'docket_entries'][0][u'short_description']) continue data[u'parties'] = None From 84d6050ee73e807ac000032ba2a1284be701628e Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 21:22:53 -0400 Subject: [PATCH 10/15] reorder if protasises for sense --- juriscraper/pacer/rss_feeds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 6df19ad62..18266ca12 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -130,11 +130,11 @@ def data(self): if ( previous_entry and data_list[-1] and data_list[-1][u'docket_entries'] + and data[u'docket_entries'] and entry.title == previous_entry.title and entry.link == previous_entry.link and entry.id == previous_entry.id and entry.published == previous_entry.published - and data['docket_entries'] ): data_list[-1][u'docket_entries'][0][u'short_description'] += ( ' AND ' + From a6a9c0883c52bdc5334a99c4b5bea4f133dfc06c Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 17 May 2018 23:32:13 -0400 Subject: [PATCH 11/15] Check data_list instead --- juriscraper/pacer/rss_feeds.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 18266ca12..94a11b250 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -128,8 +128,7 @@ def data(self): # in metadata, then add the current description to # the previous entry's and continue the loop. if ( - previous_entry and data_list[-1] - and data_list[-1][u'docket_entries'] + data_list and data_list[-1][u'docket_entries'] and data[u'docket_entries'] and entry.title == previous_entry.title and entry.link == previous_entry.link From b43395c7be19a13e03cd9fa242a2629fe22bb7ac Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 18 May 2018 00:21:08 -0400 Subject: [PATCH 12/15] rss_feeds.py/docket_entries(): u'' not '' Because otherwise tests break. I think there may be a platform issue here, but this fix is obviously correct, so I'm not worried about masking it. --- juriscraper/pacer/rss_feeds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 94a11b250..bf81859ec 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -180,7 +180,7 @@ def docket_entries(self, entry): u'date_filed': date(*entry.published_parsed[:3]), u'document_number': self._get_value(self.document_number_regex, entry.summary), - u'description': '', + u'description': u'', u'short_description': html_unescape( self._get_value(self.short_desc_regex, entry.summary)), } From d3d7526a364d68525c10f18c0c3fc7c26a8c9a62 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 18 May 2018 00:21:22 -0400 Subject: [PATCH 13/15] tests/.../rss_feeds: Update for multi-entry merging. --- tests/examples/pacer/rss_feeds/nysb_1.json | 29 +- tests/examples/pacer/rss_feeds/sdny_1.json | 493 +-------------------- 2 files changed, 18 insertions(+), 504 deletions(-) diff --git a/tests/examples/pacer/rss_feeds/nysb_1.json b/tests/examples/pacer/rss_feeds/nysb_1.json index 52881b7f9..63e55179e 100644 --- a/tests/examples/pacer/rss_feeds/nysb_1.json +++ b/tests/examples/pacer/rss_feeds/nysb_1.json @@ -717,34 +717,7 @@ "description": "", "document_number": "47", "pacer_doc_id": "126018830304", - "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" - } - ], - "docket_number": "16-35015", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "263474", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Angela S. Bittencourt", - "cause": "", - "court_id": "nysb", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-19", - "description": "", - "document_number": "47", - "pacer_doc_id": "126018830304", - "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" + "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts) AND Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" } ], "docket_number": "16-35015", diff --git a/tests/examples/pacer/rss_feeds/sdny_1.json b/tests/examples/pacer/rss_feeds/sdny_1.json index bd92cabd6..37d02ddc5 100644 --- a/tests/examples/pacer/rss_feeds/sdny_1.json +++ b/tests/examples/pacer/rss_feeds/sdny_1.json @@ -2499,34 +2499,7 @@ "description": "", "document_number": "73", "pacer_doc_id": "127022264019", - "short_description": "~Util - Add and Terminate Attorneys" - } - ], - "docket_number": "7:16-cv-02451", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "455612", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Smythe v. City of Yonkers", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "73", - "pacer_doc_id": "127022264019", - "short_description": "Stipulation and Order" + "short_description": "~Util - Add and Terminate Attorneys AND Stipulation and Order" } ], "docket_number": "7:16-cv-02451", @@ -2931,34 +2904,7 @@ "description": "", "document_number": "29", "pacer_doc_id": "127022263966", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "7:17-cv-06053", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "478821", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Hatches v. Cipollini", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "29", - "pacer_doc_id": "127022263966", - "short_description": "Order" + "short_description": "~Util - Set Deadlines AND Order" } ], "docket_number": "7:17-cv-06053", @@ -3282,34 +3228,7 @@ "description": "", "document_number": "62", "pacer_doc_id": "127022263926", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "7:17-cv-05440", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "477713", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Drayton v. Young", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "62", - "pacer_doc_id": "127022263926", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Deadlines AND Memo Endorsement" } ], "docket_number": "7:17-cv-05440", @@ -3606,34 +3525,7 @@ "description": "", "document_number": "23", "pacer_doc_id": "127022263887", - "short_description": "~Util - Set Hearings" - } - ], - "docket_number": "1:18-cv-02786", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "491087", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Best v. Layne", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "23", - "pacer_doc_id": "127022263887", - "short_description": "Order" + "short_description": "~Util - Set Hearings AND Order" } ], "docket_number": "1:18-cv-02786", @@ -4821,34 +4713,7 @@ "description": "", "document_number": "337", "pacer_doc_id": "127022263733", - "short_description": "~Util - Set Hearings" - } - ], - "docket_number": "1:16-md-02742", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "463632", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "In re: SunEdison, Inc., Securities Litigation", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "337", - "pacer_doc_id": "127022263733", - "short_description": "Order" + "short_description": "~Util - Set Hearings AND Order" } ], "docket_number": "1:16-md-02742", @@ -5226,34 +5091,7 @@ "description": "", "document_number": "13", "pacer_doc_id": "127022263695", - "short_description": "~Util - Add and Terminate Parties" - } - ], - "docket_number": "1:18-cv-02441", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "490430", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Borges v. Municipal Credit Union", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "13", - "pacer_doc_id": "127022263695", - "short_description": "Notice of Voluntary Dismissal - Signed" + "short_description": "~Util - Add and Terminate Parties AND Notice of Voluntary Dismissal - Signed" } ], "docket_number": "1:18-cv-02441", @@ -5550,34 +5388,7 @@ "description": "", "document_number": "27", "pacer_doc_id": "127022263655", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:17-cv-09841", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "485500", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Benedetto v. 209 Grub LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "27", - "pacer_doc_id": "127022263655", - "short_description": "Order" + "short_description": "~Util - Set Deadlines AND Order" } ], "docket_number": "1:17-cv-09841", @@ -5631,34 +5442,7 @@ "description": "", "document_number": "15", "pacer_doc_id": "127022263645", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:17-cv-10085", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "485885", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Aude v. Kobe Steel, Ltd.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "15", - "pacer_doc_id": "127022263645", - "short_description": "Stipulation and Order" + "short_description": "~Util - Set Deadlines AND Stipulation and Order" } ], "docket_number": "1:17-cv-10085", @@ -5820,34 +5604,7 @@ "description": "", "document_number": "42", "pacer_doc_id": "127022263619", - "short_description": "Discovery" - } - ], - "docket_number": "1:17-cv-06954", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "480431", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Thomas v. River Greene Construction Group LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "42", - "pacer_doc_id": "127022263619", - "short_description": "Compel" + "short_description": "Discovery AND Compel" } ], "docket_number": "1:17-cv-06954", @@ -6117,34 +5874,7 @@ "description": "", "document_number": "10", "pacer_doc_id": "127022263587", - "short_description": "1 - Terminate Hearings" - } - ], - "docket_number": "1:18-cv-00069", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "486234", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Herbst v. Best Buy Co. Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "10", - "pacer_doc_id": "127022263587", - "short_description": "Order" + "short_description": "1 - Terminate Hearings AND Order" } ], "docket_number": "1:18-cv-00069", @@ -6468,34 +6198,7 @@ "description": "", "document_number": "240", "pacer_doc_id": "127022263533", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:10-cv-09545", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "403622", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Baines v. The City of New York", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "240", - "pacer_doc_id": "127022263533", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:10-cv-09545", @@ -6981,34 +6684,7 @@ "description": "", "document_number": "8", "pacer_doc_id": "127022263460", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:18-cv-01932", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "489634", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Olsen v. Macaron Cafe, LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "8", - "pacer_doc_id": "127022263460", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Deadlines/Hearings AND Memo Endorsement" } ], "docket_number": "1:18-cv-01932", @@ -7332,34 +7008,7 @@ "description": "", "document_number": "63", "pacer_doc_id": "127022263420", - "short_description": "~Util - Set Motion and R&R Deadlines/Hearings" - } - ], - "docket_number": "1:17-cv-04974", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "477077", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Krondes v. Nationstar Mortgage, LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "63", - "pacer_doc_id": "127022263420", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Motion and R&R Deadlines/Hearings AND Memo Endorsement" } ], "docket_number": "1:17-cv-04974", @@ -7737,34 +7386,7 @@ "description": "", "document_number": "12", "pacer_doc_id": "127022263376", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:17-cv-07005", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "480531", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Mull v. United States", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "12", - "pacer_doc_id": "127022263376", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:17-cv-07005", @@ -8898,34 +8520,7 @@ "description": "", "document_number": "94", "pacer_doc_id": "127022263220", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:16-cv-03780", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "457783", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Collins v. Travers Fine Jewels Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "94", - "pacer_doc_id": "127022263220", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:16-cv-03780", @@ -9033,34 +8628,7 @@ "description": "", "document_number": "80", "pacer_doc_id": "127022263207", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:16-cv-00132", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "451982", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Camarata v. Experian Information Solutions, Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "80", - "pacer_doc_id": "127022263207", - "short_description": "Stipulation and Order" + "short_description": "~Util - Set Deadlines AND Stipulation and Order" } ], "docket_number": "1:16-cv-00132", @@ -9087,34 +8655,7 @@ "description": "", "document_number": "19", "pacer_doc_id": "127022263204", - "short_description": "Extension of Time to File Document" - } - ], - "docket_number": "1:15-cv-04455", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "443199", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Peralta v. City Of New York", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "19", - "pacer_doc_id": "127022263204", - "short_description": "Conference" + "short_description": "Extension of Time to File Document AND Conference" } ], "docket_number": "1:15-cv-04455", From 6ce975c69dfaca0f7803c51ba06c2ad4ba4812ea Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 18 May 2018 01:07:31 -0400 Subject: [PATCH 14/15] rss_feeds.py:data() improve docstring Update per https://github.com/freelawproject/juriscraper/pull/217#commitcomment-29028340 --- juriscraper/pacer/rss_feeds.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index bf81859ec..353c2d19f 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -99,8 +99,13 @@ def _parse_text(self, text): @property def data(self): - """Return a list of docket-like objects instead of the usual dict that - is usually provided by the BaseDocketReport superclass. + """Return a list of docket-like objects, rather than a single docket + with many entries. This allows CourtListener's merging code to + process seperate dockets, which it already knows how to do, + rather than having to learn how to manage updating multiple + cases from a docket containing different cases, as it would be + if this class returned a docket with all the entries from the + RSS feed, as provided by the BaseDocketReport superclass. When CMECF generates the RSS feed, it breaks up items with multiple consecutive entries into multiple RSS items with From 7daede309eca62ff63a34dbd80f93877fff40655 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 18 May 2018 02:54:37 -0400 Subject: [PATCH 15/15] rss_feeds.py/data(): entry -> item It's super-confusing to talk about "docket entries" as well as "RSS entries" and variables called "entry" don't help disambiguate this. Unfortunately we can't do much about the fact that feedparser's dict of items is `.entries` but hopefully this is still a readability improvement. --- juriscraper/pacer/rss_feeds.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index 353c2d19f..5ed02b6a3 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -116,29 +116,29 @@ def data(self): return self._data data_list = [] - for previous_entry, entry, next_entry in previous_and_next( + for previous_item, item, next_item in previous_and_next( self.feed.entries): - data = self.metadata(entry) + data = self.metadata(item) # We are guaranteed to only have a single docket entry for each # RSS item, and thus we use data['docket_entries'][0] below. # Coming up with an alternative data representation here and # then transforming it into what CL expects after we're done # iterating over the list is just not worth the bother. - data[u'docket_entries'] = self.docket_entries(entry) + data[u'docket_entries'] = self.docket_entries(item) # BUT: Guarantee this condition persists into the future: assert len(data[u'docket_entries']) <= 1 - # If this entry and the immediately prior entry match + # If this item and the immediately prior item match # in metadata, then add the current description to - # the previous entry's and continue the loop. + # the previous item's and continue the loop. if ( data_list and data_list[-1][u'docket_entries'] and data[u'docket_entries'] - and entry.title == previous_entry.title - and entry.link == previous_entry.link - and entry.id == previous_entry.id - and entry.published == previous_entry.published + and item.title == previous_item.title + and item.link == previous_item.link + and item.id == previous_item.id + and item.published == previous_item.published ): data_list[-1][u'docket_entries'][0][u'short_description'] += ( ' AND ' + @@ -146,7 +146,7 @@ def data(self): continue data[u'parties'] = None - data[u'docket_entries'] = self.docket_entries(entry) + data[u'docket_entries'] = self.docket_entries(item) if data[u'docket_entries'] and data[u'docket_number']: data_list.append(data)