Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(recap): Enable appellate PDF purchases #4948

Merged
merged 10 commits into from
Jan 28, 2025
30 changes: 22 additions & 8 deletions cl/corpus_importer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2005,12 +2005,21 @@ def download_pacer_pdf_by_rd(
s = ProxyPacerSession(
cookies=session_data.cookies, proxy=session_data.proxy_address
)
report = FreeOpinionReport(pacer_court_id, s)

r, r_msg = report.download_pdf(
pacer_case_id, pacer_doc_id, magic_number, de_seq_num=de_seq_num
)

if is_appellate_court(pacer_court_id):
report = AppellateDocketReport(pacer_court_id, s)
pacer_doc_id = (
pacer_doc_id
if not rd.attachment_number
else int(f"{str(pacer_doc_id)[:3]}1{str(pacer_doc_id)[4:]}")
albertisfu marked this conversation as resolved.
Show resolved Hide resolved
)
r, r_msg = report.download_pdf(
pacer_doc_id=pacer_doc_id, pacer_case_id=pacer_case_id
)
else:
report = FreeOpinionReport(pacer_court_id, s)
r, r_msg = report.download_pdf(
pacer_case_id, pacer_doc_id, magic_number, de_seq_num=de_seq_num
)
return r, r_msg


Expand Down Expand Up @@ -2208,8 +2217,13 @@ def update_rd_metadata(

rd = RECAPDocument.objects.get(pk=rd_pk)
if pdf_bytes is None:
if r_msg:
# Send a specific message all the way from Juriscraper
if r_msg and "An attachment page was returned instead" in r_msg:
msg = (
"This PACER document is part of an attachment page. "
"Our system currently lacks the metadata for this attachment. "
"Please purchase the attachment page and try again."
)
elif r_msg:
msg = f"{r_msg}: {court_id=}, {rd_pk=}"
else:
msg = (
Expand Down
7 changes: 6 additions & 1 deletion cl/recap/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,11 @@ def fetch_pacer_doc_by_rd(
self.request.chain = None
return

if rd.is_acms_document():
msg = "ACMS documents are not currently supported"
mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
return

session_data = get_pacer_cookie_from_cache(fq.user_id)
if not session_data:
msg = "Unable to find cached cookies. Aborting request."
Expand Down Expand Up @@ -1995,7 +2000,7 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None:
mark_fq_status(fq, msg, PROCESSING_STATUS.NEEDS_INFO)
return

if rd.pacer_doc_id.count("-") > 1:
if rd.is_acms_document():
msg = "ACMS attachment pages are not currently supported"
mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
return
Expand Down
204 changes: 197 additions & 7 deletions cl/recap/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
from cl.recap.tasks import (
create_or_merge_from_idb_chunk,
do_pacer_fetch,
download_pacer_pdf_by_rd,
fetch_pacer_doc_by_rd,
get_and_copy_recap_attachment_docs,
process_recap_acms_appellate_attachment,
Expand Down Expand Up @@ -1689,10 +1690,6 @@ def test_key_serialization_with_client_code(self, mock) -> None:
)


@mock.patch(
"cl.corpus_importer.tasks.FreeOpinionReport",
new=fakes.FakeFreeOpinionReport,
)
@mock.patch("cl.recap.tasks.get_pacer_cookie_from_cache")
@mock.patch(
"cl.recap.tasks.is_pacer_court_accessible",
Expand All @@ -1702,7 +1699,9 @@ class RecapPdfFetchApiTest(TestCase):
"""Can we fetch PDFs properly?"""

def setUp(self) -> None:
self.district_court = CourtFactory(jurisdiction=Court.FEDERAL_DISTRICT)
self.docket = DocketFactory(
court=self.district_court,
case_name="United States v. Curlin",
case_name_short="Curlin",
pacer_case_id="28766",
Expand Down Expand Up @@ -1731,16 +1730,58 @@ def setUp(self) -> None:
recap_document_id=self.rd.pk,
)

self.appellate_court = CourtFactory(
id="ca1", jurisdiction=Court.FEDERAL_APPELLATE
)
self.appellate_docket = DocketFactory(
source=Docket.RECAP,
court=self.appellate_court,
pacer_case_id="41651",
)
self.appellate_rd = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(
docket=self.appellate_docket, entry_number=1208699339
),
document_number=1208699339,
pacer_doc_id="1208699339",
is_available=True,
page_count=15,
document_type=RECAPDocument.PACER_DOCUMENT,
ocr_status=4,
)
self.appellate_fq = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.PDF,
recap_document_id=self.appellate_rd.pk,
)

def tearDown(self) -> None:
RECAPDocument.objects.update(is_available=True)
self.rd.refresh_from_db()

@mock.patch(
"cl.corpus_importer.tasks.FreeOpinionReport",
new=fakes.FakeFreeOpinionReport,
)
@mock.patch(
"cl.lib.storage.get_name_by_incrementing",
side_effect=clobbering_get_name,
)
def test_fetch_unavailable_pdf(
self, mock_get_cookie, mock_get_name, mock_court_accessible
@mock.patch(
"cl.recap.tasks.download_pacer_pdf_by_rd",
wraps=download_pacer_pdf_by_rd,
)
@mock.patch(
"cl.corpus_importer.tasks.is_appellate_court",
wraps=is_appellate_court,
)
def test_fetch_unavailable_pdf_district(
self,
mock_court_check,
mock_download_method,
mock_get_name,
mock_court_accessible,
mock_get_cookies,
):
"""Can we do a simple fetch of a PDF from PACER?"""
self.rd.is_available = False
Expand All @@ -1749,12 +1790,33 @@ def test_fetch_unavailable_pdf(
self.assertFalse(self.rd.is_available)
result = do_pacer_fetch(self.fq)
result.get()

# Verify that the download helper is invoked exactly once (ideal
# scenario) with the anticipated district record data.
mock_download_method.assert_called_once()
mock_download_method.assert_called_with(
self.rd.pk,
self.docket.pacer_case_id,
self.rd.pacer_doc_id,
ANY,
None,
de_seq_num=None,
)

# Verify court validation calls with expected court ID
court_id = self.district_court.id
mock_court_check.assert_called_with(court_id)

self.fq.refresh_from_db()
self.rd.refresh_from_db()
self.assertEqual(self.fq.status, PROCESSING_STATUS.SUCCESSFUL)
self.assertTrue(self.rd.is_available)

def test_fetch_available_pdf(self, mock_get_cookie, mock_court_accessible):
@mock.patch(
"cl.corpus_importer.tasks.FreeOpinionReport",
new=fakes.FakeFreeOpinionReport,
)
def test_fetch_available_pdf(self, mock_court_accessible, mock_get_cookie):
orig_date_modified = self.rd.date_modified

response = fetch_pacer_doc_by_rd(self.rd.pk, self.fq.pk)
Expand All @@ -1774,6 +1836,134 @@ def test_fetch_available_pdf(self, mock_get_cookie, mock_court_accessible):
msg="rd updated even though it was available.",
)

@mock.patch(
"cl.corpus_importer.tasks.AppellateDocketReport",
new=fakes.FakeFreeOpinionReport,
)
@mock.patch(
"cl.lib.storage.get_name_by_incrementing",
side_effect=clobbering_get_name,
)
@mock.patch(
"cl.recap.tasks.download_pacer_pdf_by_rd",
wraps=download_pacer_pdf_by_rd,
)
@mock.patch(
"cl.corpus_importer.tasks.is_appellate_court",
wraps=is_appellate_court,
)
def test_fetch_unavailable_pdf_appellate(
self,
mock_court_check,
mock_download_method,
mock_get_name,
mock_court_accessible,
mock_get_cookies,
):
"""Can we do a simple fetch of a PDF from PACER?"""
self.appellate_rd.is_available = False
self.appellate_rd.save()

self.assertFalse(self.appellate_rd.is_available)
result = do_pacer_fetch(self.appellate_fq)
result.get()

# Verify that the download helper is invoked exactly once (ideal
# scenario) with the anticipated district record data.
mock_download_method.assert_called_once()
mock_download_method.assert_called_with(
self.appellate_rd.pk,
self.appellate_docket.pacer_case_id,
self.appellate_rd.pacer_doc_id,
ANY,
None,
de_seq_num=None,
)

# Verify court validation calls with expected court ID
court_id = self.appellate_court.id
mock_court_check.assert_called_with(court_id)

self.appellate_fq.refresh_from_db()
self.appellate_rd.refresh_from_db()
self.assertEqual(
self.appellate_fq.status, PROCESSING_STATUS.SUCCESSFUL
)
self.assertTrue(self.appellate_rd.is_available)

def test_avoid_purchasing_acms_document(
self, mock_court_accessible, mock_get_cookies
):
rd_acms = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(docket=DocketFactory()),
pacer_doc_id="784459c4-e2cd-ef11-b8e9-001dd804c0b4",
)
fq_acms = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.PDF,
recap_document_id=rd_acms.pk,
)
fetch_pacer_doc_by_rd(rd_acms.pk, fq_acms.pk)

fq_acms.refresh_from_db()
self.assertEqual(fq_acms.status, PROCESSING_STATUS.FAILED)
self.assertIn(
"ACMS documents are not currently supported",
fq_acms.message,
)

@mock.patch(
"cl.recap.tasks.download_pacer_pdf_by_rd",
return_value=(None, "Unable to download PDF."),
)
def test_handle_failed_purchases(
self, mock_download_method, mock_court_accessible, mock_get_cookies
):
"""Can we handle failed purchases?"""
self.rd.is_available = False
self.rd.save()

self.assertFalse(self.rd.is_available)
fetch_pacer_doc_by_rd(self.rd.pk, self.fq.pk)

self.fq.refresh_from_db()
self.assertEqual(self.fq.status, PROCESSING_STATUS.FAILED)
self.assertIn(
"Unable to download PDF.",
self.fq.message,
)

@mock.patch(
"cl.recap.tasks.download_pacer_pdf_by_rd",
return_value=(
None,
"Unable to download PDF. An attachment page was returned instead.",
),
)
def test_add_custom_message_for_attachment_pages(
self, mock_download_method, mock_court_accessible, mock_get_cookies
):
"""
Checks if a custom message is added when the download_pacer_pdf_by_rd
function returns an error indicating an attachment page was found
instead of a PDF.
"""
self.appellate_rd.is_available = False
self.appellate_rd.save()

self.assertFalse(self.appellate_rd.is_available)
fetch_pacer_doc_by_rd(self.appellate_rd.pk, self.appellate_fq.pk)

self.appellate_fq.refresh_from_db()
self.assertEqual(self.appellate_fq.status, PROCESSING_STATUS.FAILED)

error_msg = (
"This PACER document is part of an attachment page. "
"Our system currently lacks the metadata for this attachment. "
"Please purchase the attachment page and try again."
)
self.assertEqual(error_msg, self.appellate_fq.message)


@mock.patch(
"cl.recap.tasks.is_pacer_court_accessible",
Expand Down
15 changes: 14 additions & 1 deletion cl/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,19 @@ def get_authorities_url(self) -> str:
},
)

def is_acms_document(self) -> bool:
"""
Checks if the document is from ACMS based on the presence of hyphens
in the pacer_doc_id.

ACMS documents are currently the only ones using hyphens in their
doc_id.

:return: True if the doc_id contains more than one hyphen, False
otherwise.
"""
return self.pacer_doc_id.count("-") > 1

@property
def pacer_url(self) -> str | None:
"""Construct a doc1 URL for any item, if we can. Else, return None."""
Expand All @@ -1391,7 +1404,7 @@ def pacer_url(self) -> str | None:
court = self.docket_entry.docket.court
court_id = map_cl_to_pacer_id(court.pk)
if self.pacer_doc_id:
if self.pacer_doc_id.count("-") > 1:
if self.is_acms_document():
return (
f"https://{court_id}-showdoc.azurewebsites.us/docs/"
f"{self.docket_entry.docket.pacer_case_id}/"
Expand Down
Loading