Add APD link to doc search on Arabic/JA query (#1697)

Princeton-CDH · Jan 28, 2025 · 1bdaabc · 1bdaabc
1 parent 2ed1825
commit 1bdaabc
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 0 deletions.
diff --git a/geniza/corpus/templates/corpus/document_list.html b/geniza/corpus/templates/corpus/document_list.html
@@ -157,6 +157,12 @@ <h2>
             {% if is_paginated %}
                 {% include "corpus/snippets/pagination.html" %}
             {% endif %}
+            {% if apd_link %}
+                <a id="apd" href="{{ apd_link }}">
+                    {# translators: Link to search a document query on the Arabic Papyrology Database #}
+                    {% translate 'View results in the Arabic Papyrology Database' %}
+                </a>
+            {% endif %}
         </section>
     </form>
 {% endblock main %}
diff --git a/geniza/corpus/tests/test_corpus_views.py b/geniza/corpus/tests/test_corpus_views.py
@@ -1339,6 +1339,20 @@ def test_hebrew_prefix_highlight(self, source, empty_solr):
             0
         ] == clean_html("<em>מרכב</em>")
 
+    def test_get_apd_link(self):
+        dsv = DocumentSearchView(kwargs={})
+
+        # no arabic or ja: bail out
+        assert not dsv.get_apd_link(None)
+        assert not dsv.get_apd_link("test")
+
+        # arabic: leave as is
+        arabic = "العبد"
+        assert dsv.get_apd_link(arabic) == f"{dsv.apd_base_url}{arabic}"
+
+        # JA: translate with regex
+        assert dsv.get_apd_link("ואגב") == f"{dsv.apd_base_url}وا[غج]ب"
+
 
 class TestDocumentScholarshipView:
     def test_page_title(self, document, client, source):

diff --git a/geniza/corpus/views.py b/geniza/corpus/views.py
@@ -34,6 +34,7 @@
 from geniza.common.utils import absolutize_url
 from geniza.corpus import iiif_utils
 from geniza.corpus.forms import DocumentMergeForm, DocumentSearchForm, TagMergeForm
+from geniza.corpus.ja import contains_arabic, contains_hebrew, ja_arabic_chars
 from geniza.corpus.models import Document, TextBlock
 from geniza.corpus.solr_queryset import DocumentSolrQuerySet
 from geniza.corpus.templatetags import corpus_extras
@@ -353,6 +354,26 @@ def get_paginate_by(self, queryset):
                 pass
         return paginate_by
 
+    # base url for APD searches
+    apd_base_url = "https://www.apd.gwi.uni-muenchen.de/apd/asearch.jsp?searchtable1=601&showdwords=true&searchwordstring1="
+
+    def get_apd_link(self, query):
+        """Generate a link to the Arabic Papyrology Database (APD) search page
+        using the entered query, converting any Hebrew script to Arabic with Regex"""
+        if not query or not (contains_arabic(query) or contains_hebrew(query)):
+            # if no arabic OR hebrew in query, bail out
+            return None
+        # simplified version of ja_to_arabic that uses regex instead of solr OR
+        for k, v in ja_arabic_chars.items():
+            if type(v) == list:
+                # list means there is more than one option, so join options with regex
+                query = re.sub(k, f"[{''.join(v)}]", query)
+            elif type(v) == str:
+                # only one possible translation
+                query = re.sub(k, v, query)
+        query = query.strip()
+        return f"{self.apd_base_url}{query}"
+
     def get_context_data(self, **kwargs):
         """extend context data to add page metadata, highlighting,
         and update form with facets"""
@@ -387,6 +408,7 @@ def get_context_data(self, **kwargs):
                 "page_includes_transcriptions": True,  # preload transcription font
                 "highlighting": highlights,
                 "applied_filters": self.applied_filter_labels,
+                "apd_link": self.get_apd_link(context_data["form"].data.get("q", None)),
             }
         )
 

diff --git a/sitemedia/scss/components/_results.scss b/sitemedia/scss/components/_results.scss
@@ -34,6 +34,19 @@ section#document-list {
             margin-top: spacing.$spacing-md;
         }
     }
+    a#apd {
+        text-align: center;
+        margin: 1.5rem 0;
+        @include breakpoints.for-tablet-landscape-up {
+            margin: 2.25rem 0;
+        }
+    }
+    nav.pagination + a#apd {
+        margin: 0;
+        @include breakpoints.for-tablet-landscape-up {
+            margin: 0;
+        }
+    }
 }
 
 // single result