Skip to content

Commit

Permalink
feat(annotate_ciations): Add test for unsafe characters
Browse files Browse the repository at this point in the history
Aria description needs to be safely handled
Escape case names and added tests
  • Loading branch information
flooie committed Jan 23, 2025
1 parent 0368e51 commit 7b73ed0
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 1 deletion.
3 changes: 2 additions & 1 deletion cl/citations/annotate_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ def generate_annotations(
]
else: # If successfully matched...
case_name = trunc(best_case_name(opinion.cluster), 60, "...")
safe_case_name = html.escape(case_name)
annotation = [
f'<span class="citation" data-id="{opinion.pk}">'
f'<a href="{opinion.cluster.get_absolute_url()}"'
f' aria-description="Citation for case: {case_name}"'
f' aria-description="Citation for case: {safe_case_name}"'
">",
"</a></span>",
]
Expand Down
48 changes: 48 additions & 0 deletions cl/citations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import time_machine
from asgiref.sync import async_to_sync, sync_to_async
from bs4 import BeautifulSoup
from django.contrib.auth.hashers import make_password
from django.core.cache import cache as default_cache
from django.core.management import call_command
Expand Down Expand Up @@ -370,6 +371,53 @@ def test_make_html_from_matched_citation_objects(self) -> None:
msg=f"\n{created_html}\n\n !=\n\n{expected_html}",
)

def test_unsafe_case_names(self) -> None:
"""Test unsafe characters in aria descriptions"""
case_names = [
(
# ampersand
"Farmers ' High Line Canal & Reservoir Co. v. New Hampshire Real Estate Co.",
"Citation for case: Farmers ' High Line Canal & Reservoir Co. v. New...",
),
(
# single quote
"Barmore v '",
"Citation for case: Barmore v '",
),
(
# Question mark, and double quotes
"""Shamokin, Pa.", (Leaflet in Case) Misnamed? ',""", # Question marks and double quotes with single quotes
"""Citation for case: Shamokin, Pa.", (Leaflet in Case) Misnamed? ',""",
),
]
for case_name, expected_aria in case_names:
html_opinion = "foo v. bar, 1 U.S. 1 baz"
opinion = Opinion(
plain_text=html_opinion,
pk="MATCH_ID",
cluster=Mock(OpinionCluster(id=1234), case_name=case_name),
)
get_and_clean_opinion_text(opinion)
citations = get_citations(
opinion.cleaned_text, tokenizer=HYPERSCAN_TOKENIZER
)
opinion.cluster.get_absolute_url.return_value = "/opinion/1/foo/"
citation_resolutions = {opinion: citations}
created_html = create_cited_html(opinion, citation_resolutions)

# extract out aria description
soup = BeautifulSoup(created_html, "html.parser")
citation_link = soup.find("a", {"aria-description": True})
aria_description = (
citation_link["aria-description"] if citation_link else None
)

self.assertEqual(
aria_description,
expected_aria,
msg=f"\n{aria_description}\n\n !=\n\n{expected_aria}",
)


class RECAPDocumentObjectTest(ESIndexTestCase, TestCase):
# pass
Expand Down

0 comments on commit 7b73ed0

Please sign in to comment.