diff --git a/ietf/doc/forms.py b/ietf/doc/forms.py index f77b218318..8a1e9ecb98 100644 --- a/ietf/doc/forms.py +++ b/ietf/doc/forms.py @@ -276,6 +276,7 @@ class InvestigateForm(forms.Form): ), min_length=8, ) + task_id = forms.CharField(required=False, widget=forms.HiddenInput) def clean_name_fragment(self): disallowed_characters = ["%", "/", "\\", "*"] diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index f1de459dd8..6eb901e6c7 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -31,6 +31,7 @@ generate_idnits2_rfcs_obsoleted, update_or_create_draft_bibxml_file, ensure_draft_bibxml_path_exists, + investigate_fragment, ) @@ -119,3 +120,11 @@ def generate_draft_bibxml_files_task(days=7, process_all=False): update_or_create_draft_bibxml_file(event.doc, event.rev) except Exception as err: log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}") + + +@shared_task(ignore_result=False) +def investigate_fragment_task(name_fragment: str): + return { + "name_fragment": name_fragment, + "results": investigate_fragment(name_fragment), + } diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index 0630fcd8d4..f5af7bb48b 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -3280,7 +3280,8 @@ def test_investigate_fragment(self): "draft-this-should-not-be-possible-00.txt", ) - def test_investigate(self): + def test_investigate_get(self): + """GET with no querystring should retrieve the investigate UI""" url = urlreverse("ietf.doc.views_doc.investigate") login_testing_unauthorized(self, "secretary", url) r = self.client.get(url) @@ -3288,36 +3289,143 @@ def test_investigate(self): q = PyQuery(r.content) self.assertEqual(len(q("form#investigate")), 1) self.assertEqual(len(q("div#results")), 0) - r = self.client.post(url, dict(name_fragment="this-is-not-found")) + + @mock.patch("ietf.doc.views_doc.AsyncResult") + def test_investgate_get_task_id(self, mock_asyncresult): + """GET with querystring should lookup task status""" + url = urlreverse("ietf.doc.views_doc.investigate") + login_testing_unauthorized(self, "secretary", url) + mock_asyncresult.return_value.ready.return_value = True + r = self.client.get(url + "?id=a-task-id") + self.assertEqual(r.status_code, 200) + self.assertEqual(r.json(), {"status": "ready"}) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + mock_asyncresult.reset_mock() + + mock_asyncresult.return_value.ready.return_value = False + r = self.client.get(url + "?id=a-task-id") + self.assertEqual(r.status_code, 200) + self.assertEqual(r.json(), {"status": "notready"}) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + + @mock.patch("ietf.doc.views_doc.investigate_fragment_task") + def test_investigate_post(self, mock_investigate_fragment_task): + """POST with a name_fragment and no task_id should start a celery task""" + url = urlreverse("ietf.doc.views_doc.investigate") + login_testing_unauthorized(self, "secretary", url) + + # test some invalid cases + r = self.client.post(url, {"name_fragment": "short"}) # limit is >= 8 characters self.assertEqual(r.status_code, 200) q = PyQuery(r.content) + self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) + self.assertFalse(mock_investigate_fragment_task.delay.called) + for char in ["*", "%", "/", "\\"]: + r = self.client.post(url, {"name_fragment": f"bad{char}character"}) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) + self.assertFalse(mock_investigate_fragment_task.delay.called) + + # now a valid one + mock_investigate_fragment_task.delay.return_value.id = "a-task-id" + r = self.client.post(url, {"name_fragment": "this-is-a-valid-fragment"}) + self.assertEqual(r.status_code, 200) + self.assertTrue(mock_investigate_fragment_task.delay.called) + self.assertEqual(mock_investigate_fragment_task.delay.call_args, mock.call("this-is-a-valid-fragment")) + self.assertEqual(r.json(), {"id": "a-task-id"}) + + @mock.patch("ietf.doc.views_doc.AsyncResult") + def test_investigate_post_task_id(self, mock_asyncresult): + """POST with name_fragment and task_id should retrieve results""" + url = urlreverse("ietf.doc.views_doc.investigate") + login_testing_unauthorized(self, "secretary", url) + + # First, test a non-successful result - this could be a failure or non-existent task id + mock_result = mock_asyncresult.return_value + mock_result.successful.return_value = False + r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"}) + self.assertContains(r, "The investigation task failed.", status_code=200) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + self.assertFalse(mock_result.get.called) + mock_asyncresult.reset_mock() + q = PyQuery(r.content) + self.assertEqual(q("#id_name_fragment").val(), "some-fragment") + self.assertEqual(q("#id_task_id").val(), "a-task-id") + + # now the various successful result mixes + mock_result = mock_asyncresult.return_value + mock_result.successful.return_value = True + mock_result.get.return_value = { + "name_fragment": "different-fragment", + "results": { + "can_verify": set(), + "unverifiable_collections": set(), + "unexpected": set(), + } + } + r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"}) + self.assertEqual(r.status_code, 200) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + mock_asyncresult.reset_mock() + q = PyQuery(r.content) + self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset") + self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared") self.assertEqual(len(q("div#results")), 1) self.assertEqual(len(q("table#authenticated")), 0) self.assertEqual(len(q("table#unverifiable")), 0) self.assertEqual(len(q("table#unexpected")), 0) - r = self.client.post(url, dict(name_fragment="mixed-provenance")) + + # This file was created in setUp. It allows the view to render properly + # but its location / content don't matter for this test otherwise. + a_file_that_exists = Path(settings.INTERNET_DRAFT_PATH) / "draft-this-is-active-00.txt" + + mock_result.get.return_value = { + "name_fragment": "different-fragment", + "results": { + "can_verify": {a_file_that_exists}, + "unverifiable_collections": {a_file_that_exists}, + "unexpected": set(), + } + } + r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"}) self.assertEqual(r.status_code, 200) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + mock_asyncresult.reset_mock() q = PyQuery(r.content) + self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset") + self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared") self.assertEqual(len(q("div#results")), 1) self.assertEqual(len(q("table#authenticated")), 1) self.assertEqual(len(q("table#unverifiable")), 1) self.assertEqual(len(q("table#unexpected")), 0) - r = self.client.post(url, dict(name_fragment="not-be-possible")) + + mock_result.get.return_value = { + "name_fragment": "different-fragment", + "results": { + "can_verify": set(), + "unverifiable_collections": set(), + "unexpected": {a_file_that_exists}, + } + } + r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"}) self.assertEqual(r.status_code, 200) + self.assertTrue(mock_asyncresult.called) + self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id")) + mock_asyncresult.reset_mock() q = PyQuery(r.content) + self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset") + self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared") self.assertEqual(len(q("div#results")), 1) self.assertEqual(len(q("table#authenticated")), 0) self.assertEqual(len(q("table#unverifiable")), 0) self.assertEqual(len(q("table#unexpected")), 1) - r = self.client.post(url, dict(name_fragment="short")) - self.assertEqual(r.status_code, 200) - q = PyQuery(r.content) - self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) - for char in ["*", "%", "/", "\\"]: - r = self.client.post(url, dict(name_fragment=f"bad{char}character")) - self.assertEqual(r.status_code, 200) - q = PyQuery(r.content) - self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) + class LogIOErrorTests(TestCase): diff --git a/ietf/doc/tests_tasks.py b/ietf/doc/tests_tasks.py index 67997acd85..8a6ffa8be1 100644 --- a/ietf/doc/tests_tasks.py +++ b/ietf/doc/tests_tasks.py @@ -20,6 +20,7 @@ generate_draft_bibxml_files_task, generate_idnits2_rfcs_obsoleted_task, generate_idnits2_rfc_status_task, + investigate_fragment_task, notify_expirations_task, ) @@ -98,6 +99,18 @@ def test_expire_last_calls_task(self, mock_get_expired, mock_expire): self.assertEqual(mock_expire.call_args_list[1], mock.call(docs[1])) self.assertEqual(mock_expire.call_args_list[2], mock.call(docs[2])) + def test_investigate_fragment_task(self): + investigation_results = object() # singleton + with mock.patch( + "ietf.doc.tasks.investigate_fragment", return_value=investigation_results + ) as mock_inv: + retval = investigate_fragment_task("some fragment") + self.assertTrue(mock_inv.called) + self.assertEqual(mock_inv.call_args, mock.call("some fragment")) + self.assertEqual( + retval, {"name_fragment": "some fragment", "results": investigation_results} + ) + class Idnits2SupportTests(TestCase): settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ['DERIVED_DIR'] diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index 9f7cf12bcb..591a72d907 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -41,10 +41,11 @@ from pathlib import Path +from celery.result import AsyncResult from django.core.cache import caches from django.core.exceptions import PermissionDenied from django.db.models import Max -from django.http import HttpResponse, Http404, HttpResponseBadRequest +from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse from django.shortcuts import render, get_object_or_404, redirect from django.template.loader import render_to_string from django.urls import reverse as urlreverse @@ -59,8 +60,9 @@ ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor, RelatedDocument, RelatedDocHistory) +from ietf.doc.tasks import investigate_fragment_task from ietf.doc.utils import (augment_events_with_revision, - can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment, + can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot, get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus, add_events_message_info, get_unicode_document_content, @@ -2275,16 +2277,67 @@ def idnits2_state(request, name, rev=None): content_type="text/plain;charset=utf-8", ) + @role_required("Secretariat") def investigate(request): + """Investigate a fragment + + A plain GET with no querystring returns the UI page. + + POST with the task_id field empty starts an async task and returns a JSON response with + the ID needed to monitor the task for results. + + GET with a querystring parameter "id" will poll the status of the async task and return "ready" + or "notready". + + POST with the task_id field set to the id of a "ready" task will return its results or an error + if the task failed or the id is invalid (expired, never exited, etc). + """ results = None + # Start an investigation or retrieve a result on a POST if request.method == "POST": form = InvestigateForm(request.POST) if form.is_valid(): - name_fragment = form.cleaned_data["name_fragment"] - results = investigate_fragment(name_fragment) + task_id = form.cleaned_data["task_id"] + if task_id: + # Ignore the rest of the form and retrieve the result + task_result = AsyncResult(task_id) + if task_result.successful(): + retval = task_result.get() + results = retval["results"] + form.data = form.data.copy() + form.data["name_fragment"] = retval[ + "name_fragment" + ] # ensure consistency + del form.data["task_id"] # do not request the task result again + else: + form.add_error( + None, + "The investigation task failed. Please try again and ask for help if this recurs.", + ) + # Falls through to the render at the end! + else: + name_fragment = form.cleaned_data["name_fragment"] + task_result = investigate_fragment_task.delay(name_fragment) + return JsonResponse({"id": task_result.id}) else: - form = InvestigateForm() + task_id = request.GET.get("id", None) + if task_id is not None: + # Check status if we got the "id" parameter + task_result = AsyncResult(task_id) + return JsonResponse( + {"status": "ready" if task_result.ready() else "notready"} + ) + else: + # Serve up an empty form + form = InvestigateForm() + + # If we get here, it is just a plain GET - serve the UI return render( - request, "doc/investigate.html", context=dict(form=form, results=results) + request, + "doc/investigate.html", + context={ + "form": form, + "results": results, + }, ) diff --git a/ietf/settings.py b/ietf/settings.py index b452864be6..a1dc9ffe21 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -452,6 +452,7 @@ def skip_unreadable_post(record): 'django_vite', 'django_bootstrap5', 'django_celery_beat', + 'django_celery_results', 'corsheaders', 'django_markup', 'oidc_provider', @@ -1226,7 +1227,9 @@ def skip_unreadable_post(record): # https://docs.celeryq.dev/en/stable/userguide/tasks.html#rpc-result-backend-rabbitmq-qpid # Results can be retrieved only once and only by the caller of the task. Results will be # lost if the message broker restarts. -CELERY_RESULT_BACKEND = 'rpc://' # sends a msg via the msg broker +CELERY_RESULT_BACKEND = 'django-cache' # use a Django cache for results +CELERY_CACHE_BACKEND = 'celery-results' # which Django cache to use +CELERY_RESULT_EXPIRES = datetime.timedelta(minutes=5) # how long are results valid? (Default is 1 day) CELERY_TASK_IGNORE_RESULT = True # ignore results unless specifically enabled for a task # Meetecho API setup: Uncomment this and provide real credentials to enable @@ -1309,6 +1312,11 @@ def skip_unreadable_post(record): "MAX_ENTRIES": 5000, }, }, + "celery-results": { + "BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache", + "LOCATION": f"{MEMCACHED_HOST}:{MEMCACHED_PORT}", + "KEY_PREFIX": "ietf:celery", + }, } else: CACHES = { @@ -1347,6 +1355,11 @@ def skip_unreadable_post(record): "MAX_ENTRIES": 5000, }, }, + "celery-results": { + "BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache", + "LOCATION": "app:11211", + "KEY_PREFIX": "ietf:celery", + }, } PUBLISH_IPR_STATES = ['posted', 'removed', 'removed_objfalse'] diff --git a/ietf/static/js/investigate.js b/ietf/static/js/investigate.js new file mode 100644 index 0000000000..b22e099b1e --- /dev/null +++ b/ietf/static/js/investigate.js @@ -0,0 +1,53 @@ +// Copyright The IETF Trust 2025, All Rights Reserved +document.addEventListener('DOMContentLoaded', () => { + const investigateForm = document.forms['investigate'] + investigateForm.addEventListener('submit', (event) => { + // Intercept submission unless we've filled in the task_id field + if (!investigateForm.elements['id_task_id'].value) { + event.preventDefault() + runInvestigation() + } + }) + + const runInvestigation = async () => { + // Submit the request + const response = await fetch('', { + method: investigateForm.method, body: new FormData(investigateForm) + }) + if (!response.ok) { + loadResultsFromTask('bogus-task-id') // bad task id will generate an error from Django + } + const taskId = (await response.json()).id + // Poll for completion of the investigation up to 18*10 = 180 seconds + waitForResults(taskId, 18) + } + + const waitForResults = async (taskId, retries) => { + // indicate that investigation is in progress + document.getElementById('spinner').classList.remove('d-none') + document.getElementById('investigate-button').disabled = true + investigateForm.elements['id_name_fragment'].disabled = true + + const response = await fetch('?' + new URLSearchParams({ id: taskId })) + if (!response.ok) { + loadResultsFromTask('bogus-task-id') // bad task id will generate an error from Django + } + const result = await response.json() + if (result.status !== 'ready' && retries > 0) { + // 10 seconds per retry + setTimeout(waitForResults, 10000, taskId, retries - 1) + } else { + /* Either the response is ready or we timed out waiting. In either case, submit + the task_id via POST and let Django display an error if it's not ready. Before + submitting, re-enable the form fields so the POST is valid. Other in-progress + indicators will be reset when the POST response is loaded. */ + loadResultsFromTask(taskId) + } + } + + const loadResultsFromTask = (taskId) => { + investigateForm.elements['id_name_fragment'].disabled = false + investigateForm.elements['id_task_id'].value = taskId + investigateForm.submit() + } +}) diff --git a/ietf/templates/doc/investigate.html b/ietf/templates/doc/investigate.html index bdcf644406..436a8ce91a 100644 --- a/ietf/templates/doc/investigate.html +++ b/ietf/templates/doc/investigate.html @@ -6,112 +6,122 @@ {% endblock %} {% block content %} - {% origin %} -

Investigate

-
- {% csrf_token %} - {% bootstrap_form form %} - -
- {% if results %} -
- {% if results.can_verify %} -

These can be authenticated

- - - - - - - - - - - {% for path in results.can_verify %} - {% with url=path|url_for_path %} - - - - - - - {% endwith %} - {% endfor %} - -
NameLast Modified OnLinkSource
{{path.name}} - {% if path|mtime_is_epoch %} - Timestamp has been lost (is Unix Epoch) - {% else %} - {{path|mtime|date:"DATETIME_FORMAT"}} - {% endif %} - {{url}}{{path}}
- {% else %} -

Nothing with this name fragment can be authenticated

- {% endif %} -
- {% if results.unverifiable_collections %} -

These are in the archive, but cannot be authenticated

- - - - - - - - - - - {% for path in results.unverifiable_collections %} - {% with url=path|url_for_path %} - - - - - - - {% endwith %} - {% endfor %} - -
NameLast Modified OnLinkSource
{{path.name}} - {% if path|mtime_is_epoch %} - Timestamp has been lost (is Unix Epoch) - {% else %} - {{path|mtime|date:"DATETIME_FORMAT"}} - {% endif %} - {{url}}{{path}}
- {% endif %} - {% if results.unexpected %} -

These are unexpected and we do not know what their origin is. These cannot be authenticated

- - - - - - - - - - {% for path in results.unexpected %} - {% with url=path|url_for_path %} - - - - - - {% endwith %} - {% endfor %} - -
NameLast Modified OnLink
{{path.name}} - {% if path|mtime_is_epoch %} - Timestamp has been lost (is Unix Epoch) - {% else %} - {{path|mtime|date:"DATETIME_FORMAT"}} - {% endif %} - {{url}}
- {% endif %} + {% origin %} +

Investigate

+
+
+ {% csrf_token %} + {% bootstrap_form form %} + +
- {% endif %} + {% if results %} +
+ {% if results.can_verify %} +

These can be authenticated

+ + + + + + + + + + + {% for path in results.can_verify %} + {% with url=path|url_for_path %} + + + + + + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLinkSource
{{ path.name }} + {% if path|mtime_is_epoch %} + Timestamp has been lost (is Unix Epoch) + {% else %} + {{ path|mtime|date:"DATETIME_FORMAT" }} + {% endif %} + {{ url }}{{ path }}
+ {% else %} +

Nothing with this name fragment can be authenticated

+ {% endif %} +
+ {% if results.unverifiable_collections %} +

These are in the archive, but cannot be authenticated

+ + + + + + + + + + + {% for path in results.unverifiable_collections %} + {% with url=path|url_for_path %} + + + + + + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLinkSource
{{ path.name }} + {% if path|mtime_is_epoch %} + Timestamp has been lost (is Unix Epoch) + {% else %} + {{ path|mtime|date:"DATETIME_FORMAT" }} + {% endif %} + {{ url }}{{ path }}
+ {% endif %} + {% if results.unexpected %} +

These are unexpected and we do not know what their origin is. These cannot be authenticated

+ + + + + + + + + + {% for path in results.unexpected %} + {% with url=path|url_for_path %} + + + + + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLink
{{ path.name }} + {% if path|mtime_is_epoch %} + Timestamp has been lost (is Unix Epoch) + {% else %} + {{ path|mtime|date:"DATETIME_FORMAT" }} + {% endif %} + {{ url }}
+ {% endif %} +
+ {% endif %} {% endblock %} {% block js %} + {% endblock %} \ No newline at end of file diff --git a/package.json b/package.json index afcabd13ed..b3d36b349c 100644 --- a/package.json +++ b/package.json @@ -132,6 +132,7 @@ "ietf/static/js/highcharts.js", "ietf/static/js/highstock.js", "ietf/static/js/ietf.js", + "ietf/static/js/investigate.js", "ietf/static/js/ipr-edit.js", "ietf/static/js/ipr-search.js", "ietf/static/js/js-cookie.js", diff --git a/requirements.txt b/requirements.txt index ec5fc60b5f..ae8b06faee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ Django>4.2,<5 django-analytical>=3.1.0 django-bootstrap5>=21.3 django-celery-beat>=2.3.0 +django-celery-results>=2.5.1 django-csp>=3.7 django-cors-headers>=3.11.0 django-debug-toolbar>=3.2.4