From 16b5cf36f787232c67242fab69f82dd502c5e3ad Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 29 Jan 2025 09:13:08 -0500 Subject: [PATCH] Add conditions in rest_client and base client implementation to drill down into response code and further group errors as ones that should be retried and ones that are hard failures --- client/src/nv_ingest_client/client/client.py | 14 +++++++++++++- .../message_clients/rest/rest_client.py | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/client/src/nv_ingest_client/client/client.py b/client/src/nv_ingest_client/client/client.py index 0b2f3017..56ead1f7 100644 --- a/client/src/nv_ingest_client/client/client.py +++ b/client/src/nv_ingest_client/client/client.py @@ -321,7 +321,19 @@ def _fetch_job_result(self, job_index: str, timeout: float = 100, data_only: boo # Only pop once we know we've successfully decoded the response or errored out _ = self._pop_job_state(job_index) else: - raise TimeoutError(f"Timeout: No response within {timeout} seconds for job ID {job_index}") + # There are a plethora of codes that can be thrown. Some offer specific insights while + # others can be grouped into a general failure category. We check for specific codes here + # and then generally error on the others. + if response.response_code == 404: + # job_id not found on serverside. This condition will not alleviate itself with a retry + raise RuntimeError(f"JobId: {job_state.job_id} not found - Reason: {response.response_reason}") + elif response.response_code == 500: + # properly propagated server side error + raise RuntimeError(f"Response: {response.response} - Reason: {response.response_reason}") + else: + # Generalized errors group. These errors are ones that could potentially be resolved and + # therefore should be retried. Existing logic works based on TimeoutErrors so we raise here. + raise TimeoutError(f"Response: {response.response} - Reason: {response.response_reason}") except TimeoutError: raise diff --git a/client/src/nv_ingest_client/message_clients/rest/rest_client.py b/client/src/nv_ingest_client/message_clients/rest/rest_client.py index c65952f2..e2001498 100644 --- a/client/src/nv_ingest_client/message_clients/rest/rest_client.py +++ b/client/src/nv_ingest_client/message_clients/rest/rest_client.py @@ -209,7 +209,7 @@ def fetch_message(self, job_id: str, timeout: float = 10) -> ResponseSchema: if response_code in _TERMINAL_RESPONSE_STATUSES: # Terminal response code; return error ResponseSchema return ResponseSchema( - response_code=1, + response_code=response_code, response_reason=( f"Terminal response code {response_code} received when fetching JobSpec: {job_id}" ),