Skip to content

Commit

Permalink
feat: async investigate_fragment task; celery results backend (#8428)
Browse files Browse the repository at this point in the history
* feat: investigate docs asynchronously

* refactor: move script to its own js file

* fix: adjust polling interval/duration

* test: test new task

* fix: extra tag/fix whitespace

* style: restore whitespace (I hope)

* style: black/standard styling

* test: fix test of investigate view

* test: improve/delint tests
  • Loading branch information
jennifer-richards authored Jan 17, 2025
1 parent df27ba9 commit c848a5a
Show file tree
Hide file tree
Showing 10 changed files with 386 additions and 124 deletions.
1 change: 1 addition & 0 deletions ietf/doc/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ class InvestigateForm(forms.Form):
),
min_length=8,
)
task_id = forms.CharField(required=False, widget=forms.HiddenInput)

def clean_name_fragment(self):
disallowed_characters = ["%", "/", "\\", "*"]
Expand Down
9 changes: 9 additions & 0 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
generate_idnits2_rfcs_obsoleted,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
)


Expand Down Expand Up @@ -119,3 +120,11 @@ def generate_draft_bibxml_files_task(days=7, process_all=False):
update_or_create_draft_bibxml_file(event.doc, event.rev)
except Exception as err:
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")


@shared_task(ignore_result=False)
def investigate_fragment_task(name_fragment: str):
return {
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}
134 changes: 121 additions & 13 deletions ietf/doc/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3280,44 +3280,152 @@ def test_investigate_fragment(self):
"draft-this-should-not-be-possible-00.txt",
)

def test_investigate(self):
def test_investigate_get(self):
"""GET with no querystring should retrieve the investigate UI"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("form#investigate")), 1)
self.assertEqual(len(q("div#results")), 0)
r = self.client.post(url, dict(name_fragment="this-is-not-found"))

@mock.patch("ietf.doc.views_doc.AsyncResult")
def test_investgate_get_task_id(self, mock_asyncresult):
"""GET with querystring should lookup task status"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
mock_asyncresult.return_value.ready.return_value = True
r = self.client.get(url + "?id=a-task-id")
self.assertEqual(r.status_code, 200)
self.assertEqual(r.json(), {"status": "ready"})
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()

mock_asyncresult.return_value.ready.return_value = False
r = self.client.get(url + "?id=a-task-id")
self.assertEqual(r.status_code, 200)
self.assertEqual(r.json(), {"status": "notready"})
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))

@mock.patch("ietf.doc.views_doc.investigate_fragment_task")
def test_investigate_post(self, mock_investigate_fragment_task):
"""POST with a name_fragment and no task_id should start a celery task"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)

# test some invalid cases
r = self.client.post(url, {"name_fragment": "short"}) # limit is >= 8 characters
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
self.assertFalse(mock_investigate_fragment_task.delay.called)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, {"name_fragment": f"bad{char}character"})
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
self.assertFalse(mock_investigate_fragment_task.delay.called)

# now a valid one
mock_investigate_fragment_task.delay.return_value.id = "a-task-id"
r = self.client.post(url, {"name_fragment": "this-is-a-valid-fragment"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_investigate_fragment_task.delay.called)
self.assertEqual(mock_investigate_fragment_task.delay.call_args, mock.call("this-is-a-valid-fragment"))
self.assertEqual(r.json(), {"id": "a-task-id"})

@mock.patch("ietf.doc.views_doc.AsyncResult")
def test_investigate_post_task_id(self, mock_asyncresult):
"""POST with name_fragment and task_id should retrieve results"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)

# First, test a non-successful result - this could be a failure or non-existent task id
mock_result = mock_asyncresult.return_value
mock_result.successful.return_value = False
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertContains(r, "The investigation task failed.", status_code=200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
self.assertFalse(mock_result.get.called)
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "some-fragment")
self.assertEqual(q("#id_task_id").val(), "a-task-id")

# now the various successful result mixes
mock_result = mock_asyncresult.return_value
mock_result.successful.return_value = True
mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": set(),
"unverifiable_collections": set(),
"unexpected": set(),
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="mixed-provenance"))

# This file was created in setUp. It allows the view to render properly
# but its location / content don't matter for this test otherwise.
a_file_that_exists = Path(settings.INTERNET_DRAFT_PATH) / "draft-this-is-active-00.txt"

mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": {a_file_that_exists},
"unverifiable_collections": {a_file_that_exists},
"unexpected": set(),
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 1)
self.assertEqual(len(q("table#unverifiable")), 1)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="not-be-possible"))

mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": set(),
"unverifiable_collections": set(),
"unexpected": {a_file_that_exists},
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 1)
r = self.client.post(url, dict(name_fragment="short"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)


class LogIOErrorTests(TestCase):

Expand Down
13 changes: 13 additions & 0 deletions ietf/doc/tests_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
generate_draft_bibxml_files_task,
generate_idnits2_rfcs_obsoleted_task,
generate_idnits2_rfc_status_task,
investigate_fragment_task,
notify_expirations_task,
)

Expand Down Expand Up @@ -98,6 +99,18 @@ def test_expire_last_calls_task(self, mock_get_expired, mock_expire):
self.assertEqual(mock_expire.call_args_list[1], mock.call(docs[1]))
self.assertEqual(mock_expire.call_args_list[2], mock.call(docs[2]))

def test_investigate_fragment_task(self):
investigation_results = object() # singleton
with mock.patch(
"ietf.doc.tasks.investigate_fragment", return_value=investigation_results
) as mock_inv:
retval = investigate_fragment_task("some fragment")
self.assertTrue(mock_inv.called)
self.assertEqual(mock_inv.call_args, mock.call("some fragment"))
self.assertEqual(
retval, {"name_fragment": "some fragment", "results": investigation_results}
)


class Idnits2SupportTests(TestCase):
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ['DERIVED_DIR']
Expand Down
65 changes: 59 additions & 6 deletions ietf/doc/views_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@

from pathlib import Path

from celery.result import AsyncResult
from django.core.cache import caches
from django.core.exceptions import PermissionDenied
from django.db.models import Max
from django.http import HttpResponse, Http404, HttpResponseBadRequest
from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.template.loader import render_to_string
from django.urls import reverse as urlreverse
Expand All @@ -59,8 +60,9 @@
ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent,
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
RelatedDocument, RelatedDocHistory)
from ietf.doc.tasks import investigate_fragment_task
from ietf.doc.utils import (augment_events_with_revision,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
add_events_message_info, get_unicode_document_content,
Expand Down Expand Up @@ -2275,16 +2277,67 @@ def idnits2_state(request, name, rev=None):
content_type="text/plain;charset=utf-8",
)


@role_required("Secretariat")
def investigate(request):
"""Investigate a fragment
A plain GET with no querystring returns the UI page.
POST with the task_id field empty starts an async task and returns a JSON response with
the ID needed to monitor the task for results.
GET with a querystring parameter "id" will poll the status of the async task and return "ready"
or "notready".
POST with the task_id field set to the id of a "ready" task will return its results or an error
if the task failed or the id is invalid (expired, never exited, etc).
"""
results = None
# Start an investigation or retrieve a result on a POST
if request.method == "POST":
form = InvestigateForm(request.POST)
if form.is_valid():
name_fragment = form.cleaned_data["name_fragment"]
results = investigate_fragment(name_fragment)
task_id = form.cleaned_data["task_id"]
if task_id:
# Ignore the rest of the form and retrieve the result
task_result = AsyncResult(task_id)
if task_result.successful():
retval = task_result.get()
results = retval["results"]
form.data = form.data.copy()
form.data["name_fragment"] = retval[
"name_fragment"
] # ensure consistency
del form.data["task_id"] # do not request the task result again
else:
form.add_error(
None,
"The investigation task failed. Please try again and ask for help if this recurs.",
)
# Falls through to the render at the end!
else:
name_fragment = form.cleaned_data["name_fragment"]
task_result = investigate_fragment_task.delay(name_fragment)
return JsonResponse({"id": task_result.id})
else:
form = InvestigateForm()
task_id = request.GET.get("id", None)
if task_id is not None:
# Check status if we got the "id" parameter
task_result = AsyncResult(task_id)
return JsonResponse(
{"status": "ready" if task_result.ready() else "notready"}
)
else:
# Serve up an empty form
form = InvestigateForm()

# If we get here, it is just a plain GET - serve the UI
return render(
request, "doc/investigate.html", context=dict(form=form, results=results)
request,
"doc/investigate.html",
context={
"form": form,
"results": results,
},
)
15 changes: 14 additions & 1 deletion ietf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ def skip_unreadable_post(record):
'django_vite',
'django_bootstrap5',
'django_celery_beat',
'django_celery_results',
'corsheaders',
'django_markup',
'oidc_provider',
Expand Down Expand Up @@ -1226,7 +1227,9 @@ def skip_unreadable_post(record):
# https://docs.celeryq.dev/en/stable/userguide/tasks.html#rpc-result-backend-rabbitmq-qpid
# Results can be retrieved only once and only by the caller of the task. Results will be
# lost if the message broker restarts.
CELERY_RESULT_BACKEND = 'rpc://' # sends a msg via the msg broker
CELERY_RESULT_BACKEND = 'django-cache' # use a Django cache for results
CELERY_CACHE_BACKEND = 'celery-results' # which Django cache to use
CELERY_RESULT_EXPIRES = datetime.timedelta(minutes=5) # how long are results valid? (Default is 1 day)
CELERY_TASK_IGNORE_RESULT = True # ignore results unless specifically enabled for a task

# Meetecho API setup: Uncomment this and provide real credentials to enable
Expand Down Expand Up @@ -1309,6 +1312,11 @@ def skip_unreadable_post(record):
"MAX_ENTRIES": 5000,
},
},
"celery-results": {
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
"LOCATION": f"{MEMCACHED_HOST}:{MEMCACHED_PORT}",
"KEY_PREFIX": "ietf:celery",
},
}
else:
CACHES = {
Expand Down Expand Up @@ -1347,6 +1355,11 @@ def skip_unreadable_post(record):
"MAX_ENTRIES": 5000,
},
},
"celery-results": {
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
"LOCATION": "app:11211",
"KEY_PREFIX": "ietf:celery",
},
}

PUBLISH_IPR_STATES = ['posted', 'removed', 'removed_objfalse']
Expand Down
Loading

0 comments on commit c848a5a

Please sign in to comment.