diff --git a/juriscraper/pacer/rss_feeds.py b/juriscraper/pacer/rss_feeds.py index f7124cc25..5ed02b6a3 100644 --- a/juriscraper/pacer/rss_feeds.py +++ b/juriscraper/pacer/rss_feeds.py @@ -12,6 +12,7 @@ from ..lib.html_utils import html_unescape from ..lib.log_tools import make_default_logger from ..lib.string_utils import harmonize, clean_string +from ..lib.utils import previous_and_next logger = make_default_logger() @@ -98,19 +99,57 @@ def _parse_text(self, text): @property def data(self): - """Override this to create a list of docket-like objects instead of the - usual dict that is usually provided by the docket report. + """Return a list of docket-like objects, rather than a single docket + with many entries. This allows CourtListener's merging code to + process seperate dockets, which it already knows how to do, + rather than having to learn how to manage updating multiple + cases from a docket containing different cases, as it would be + if this class returned a docket with all the entries from the + RSS feed, as provided by the BaseDocketReport superclass. + + When CMECF generates the RSS feed, it breaks up items with + multiple consecutive entries into multiple RSS items with + identical timestamp/id/title. We reverse that and recombine + those items. """ if self._data is not None: return self._data data_list = [] - for entry in self.feed.entries: - data = self.metadata(entry) + for previous_item, item, next_item in previous_and_next( + self.feed.entries): + data = self.metadata(item) + + # We are guaranteed to only have a single docket entry for each + # RSS item, and thus we use data['docket_entries'][0] below. + # Coming up with an alternative data representation here and + # then transforming it into what CL expects after we're done + # iterating over the list is just not worth the bother. + data[u'docket_entries'] = self.docket_entries(item) + # BUT: Guarantee this condition persists into the future: + assert len(data[u'docket_entries']) <= 1 + + # If this item and the immediately prior item match + # in metadata, then add the current description to + # the previous item's and continue the loop. + if ( + data_list and data_list[-1][u'docket_entries'] + and data[u'docket_entries'] + and item.title == previous_item.title + and item.link == previous_item.link + and item.id == previous_item.id + and item.published == previous_item.published + ): + data_list[-1][u'docket_entries'][0][u'short_description'] += ( + ' AND ' + + data[u'docket_entries'][0][u'short_description']) + continue + data[u'parties'] = None - data[u'docket_entries'] = self.docket_entries(entry) - if data[u'docket_entries'] and data['docket_number']: + data[u'docket_entries'] = self.docket_entries(item) + if data[u'docket_entries'] and data[u'docket_number']: data_list.append(data) + self._data = data_list return data_list @@ -146,7 +185,7 @@ def docket_entries(self, entry): u'date_filed': date(*entry.published_parsed[:3]), u'document_number': self._get_value(self.document_number_regex, entry.summary), - u'description': '', + u'description': u'', u'short_description': html_unescape( self._get_value(self.short_desc_regex, entry.summary)), } diff --git a/tests/examples/pacer/rss_feeds/nysb_1.json b/tests/examples/pacer/rss_feeds/nysb_1.json index 52881b7f9..63e55179e 100644 --- a/tests/examples/pacer/rss_feeds/nysb_1.json +++ b/tests/examples/pacer/rss_feeds/nysb_1.json @@ -717,34 +717,7 @@ "description": "", "document_number": "47", "pacer_doc_id": "126018830304", - "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" - } - ], - "docket_number": "16-35015", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "263474", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Angela S. Bittencourt", - "cause": "", - "court_id": "nysb", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-19", - "description": "", - "document_number": "47", - "pacer_doc_id": "126018830304", - "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" + "short_description": "Motion, Redact (Fee) (NOT to be used for redacting in Transcripts) AND Motion, Redact (Fee) (NOT to be used for redacting in Transcripts)" } ], "docket_number": "16-35015", diff --git a/tests/examples/pacer/rss_feeds/sdny_1.json b/tests/examples/pacer/rss_feeds/sdny_1.json index bd92cabd6..37d02ddc5 100644 --- a/tests/examples/pacer/rss_feeds/sdny_1.json +++ b/tests/examples/pacer/rss_feeds/sdny_1.json @@ -2499,34 +2499,7 @@ "description": "", "document_number": "73", "pacer_doc_id": "127022264019", - "short_description": "~Util - Add and Terminate Attorneys" - } - ], - "docket_number": "7:16-cv-02451", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "455612", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Smythe v. City of Yonkers", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "73", - "pacer_doc_id": "127022264019", - "short_description": "Stipulation and Order" + "short_description": "~Util - Add and Terminate Attorneys AND Stipulation and Order" } ], "docket_number": "7:16-cv-02451", @@ -2931,34 +2904,7 @@ "description": "", "document_number": "29", "pacer_doc_id": "127022263966", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "7:17-cv-06053", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "478821", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Hatches v. Cipollini", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "29", - "pacer_doc_id": "127022263966", - "short_description": "Order" + "short_description": "~Util - Set Deadlines AND Order" } ], "docket_number": "7:17-cv-06053", @@ -3282,34 +3228,7 @@ "description": "", "document_number": "62", "pacer_doc_id": "127022263926", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "7:17-cv-05440", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "477713", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Drayton v. Young", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "62", - "pacer_doc_id": "127022263926", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Deadlines AND Memo Endorsement" } ], "docket_number": "7:17-cv-05440", @@ -3606,34 +3525,7 @@ "description": "", "document_number": "23", "pacer_doc_id": "127022263887", - "short_description": "~Util - Set Hearings" - } - ], - "docket_number": "1:18-cv-02786", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "491087", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Best v. Layne", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "23", - "pacer_doc_id": "127022263887", - "short_description": "Order" + "short_description": "~Util - Set Hearings AND Order" } ], "docket_number": "1:18-cv-02786", @@ -4821,34 +4713,7 @@ "description": "", "document_number": "337", "pacer_doc_id": "127022263733", - "short_description": "~Util - Set Hearings" - } - ], - "docket_number": "1:16-md-02742", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "463632", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "In re: SunEdison, Inc., Securities Litigation", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "337", - "pacer_doc_id": "127022263733", - "short_description": "Order" + "short_description": "~Util - Set Hearings AND Order" } ], "docket_number": "1:16-md-02742", @@ -5226,34 +5091,7 @@ "description": "", "document_number": "13", "pacer_doc_id": "127022263695", - "short_description": "~Util - Add and Terminate Parties" - } - ], - "docket_number": "1:18-cv-02441", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "490430", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Borges v. Municipal Credit Union", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "13", - "pacer_doc_id": "127022263695", - "short_description": "Notice of Voluntary Dismissal - Signed" + "short_description": "~Util - Add and Terminate Parties AND Notice of Voluntary Dismissal - Signed" } ], "docket_number": "1:18-cv-02441", @@ -5550,34 +5388,7 @@ "description": "", "document_number": "27", "pacer_doc_id": "127022263655", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:17-cv-09841", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "485500", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Benedetto v. 209 Grub LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "27", - "pacer_doc_id": "127022263655", - "short_description": "Order" + "short_description": "~Util - Set Deadlines AND Order" } ], "docket_number": "1:17-cv-09841", @@ -5631,34 +5442,7 @@ "description": "", "document_number": "15", "pacer_doc_id": "127022263645", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:17-cv-10085", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "485885", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Aude v. Kobe Steel, Ltd.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "15", - "pacer_doc_id": "127022263645", - "short_description": "Stipulation and Order" + "short_description": "~Util - Set Deadlines AND Stipulation and Order" } ], "docket_number": "1:17-cv-10085", @@ -5820,34 +5604,7 @@ "description": "", "document_number": "42", "pacer_doc_id": "127022263619", - "short_description": "Discovery" - } - ], - "docket_number": "1:17-cv-06954", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "480431", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Thomas v. River Greene Construction Group LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "42", - "pacer_doc_id": "127022263619", - "short_description": "Compel" + "short_description": "Discovery AND Compel" } ], "docket_number": "1:17-cv-06954", @@ -6117,34 +5874,7 @@ "description": "", "document_number": "10", "pacer_doc_id": "127022263587", - "short_description": "1 - Terminate Hearings" - } - ], - "docket_number": "1:18-cv-00069", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "486234", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Herbst v. Best Buy Co. Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "10", - "pacer_doc_id": "127022263587", - "short_description": "Order" + "short_description": "1 - Terminate Hearings AND Order" } ], "docket_number": "1:18-cv-00069", @@ -6468,34 +6198,7 @@ "description": "", "document_number": "240", "pacer_doc_id": "127022263533", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:10-cv-09545", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "403622", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Baines v. The City of New York", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "240", - "pacer_doc_id": "127022263533", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:10-cv-09545", @@ -6981,34 +6684,7 @@ "description": "", "document_number": "8", "pacer_doc_id": "127022263460", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:18-cv-01932", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "489634", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Olsen v. Macaron Cafe, LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "8", - "pacer_doc_id": "127022263460", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Deadlines/Hearings AND Memo Endorsement" } ], "docket_number": "1:18-cv-01932", @@ -7332,34 +7008,7 @@ "description": "", "document_number": "63", "pacer_doc_id": "127022263420", - "short_description": "~Util - Set Motion and R&R Deadlines/Hearings" - } - ], - "docket_number": "1:17-cv-04974", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "477077", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Krondes v. Nationstar Mortgage, LLC", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "63", - "pacer_doc_id": "127022263420", - "short_description": "Memo Endorsement" + "short_description": "~Util - Set Motion and R&R Deadlines/Hearings AND Memo Endorsement" } ], "docket_number": "1:17-cv-04974", @@ -7737,34 +7386,7 @@ "description": "", "document_number": "12", "pacer_doc_id": "127022263376", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:17-cv-07005", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "480531", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Mull v. United States", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "12", - "pacer_doc_id": "127022263376", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:17-cv-07005", @@ -8898,34 +8520,7 @@ "description": "", "document_number": "94", "pacer_doc_id": "127022263220", - "short_description": "~Util - Set Deadlines/Hearings" - } - ], - "docket_number": "1:16-cv-03780", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "457783", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Collins v. Travers Fine Jewels Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "94", - "pacer_doc_id": "127022263220", - "short_description": "Order" + "short_description": "~Util - Set Deadlines/Hearings AND Order" } ], "docket_number": "1:16-cv-03780", @@ -9033,34 +8628,7 @@ "description": "", "document_number": "80", "pacer_doc_id": "127022263207", - "short_description": "~Util - Set Deadlines" - } - ], - "docket_number": "1:16-cv-00132", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "451982", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Camarata v. Experian Information Solutions, Inc.", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "80", - "pacer_doc_id": "127022263207", - "short_description": "Stipulation and Order" + "short_description": "~Util - Set Deadlines AND Stipulation and Order" } ], "docket_number": "1:16-cv-00132", @@ -9087,34 +8655,7 @@ "description": "", "document_number": "19", "pacer_doc_id": "127022263204", - "short_description": "Extension of Time to File Document" - } - ], - "docket_number": "1:15-cv-04455", - "jurisdiction": "", - "jury_demand": "", - "nature_of_suit": "", - "pacer_case_id": "443199", - "parties": null, - "referred_to_str": "" - }, - { - "assigned_to_str": "", - "case_name": "Peralta v. City Of New York", - "cause": "", - "court_id": "sdny", - "date_converted": null, - "date_discharged": null, - "date_filed": null, - "date_terminated": null, - "demand": "", - "docket_entries": [ - { - "date_filed": "2018-04-17", - "description": "", - "document_number": "19", - "pacer_doc_id": "127022263204", - "short_description": "Conference" + "short_description": "Extension of Time to File Document AND Conference" } ], "docket_number": "1:15-cv-04455",