From 0107e307eae87f1918f9baf2cb24567ad0838e6b Mon Sep 17 00:00:00 2001 From: Siddharth Venkatesan Date: Tue, 22 Oct 2024 21:52:36 -0700 Subject: [PATCH] [lmi] add minimal requestId logging for new requests, and exceptions/errors (#2472) --- engines/python/setup/djl_python/input_parser.py | 8 +++++++- engines/python/setup/djl_python/request.py | 8 ++++++++ engines/python/setup/djl_python/request_io.py | 1 + .../djl_python/rolling_batch/rolling_batch.py | 15 +++++++++++---- engines/python/setup/djl_python/utils.py | 2 ++ 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/engines/python/setup/djl_python/input_parser.py b/engines/python/setup/djl_python/input_parser.py index 31c46b001..d0d997b8f 100644 --- a/engines/python/setup/djl_python/input_parser.py +++ b/engines/python/setup/djl_python/input_parser.py @@ -78,6 +78,7 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput: input_formatter_function = configs.input_formatter if configs.input_formatter else format_input for i in range(start_batch_id, len(batch)): input_item = batch[i] + client_request_id = input_item.get_property("requestId") try: # input formatter can be user written as well. We look for model.py and search for the decorator. request_input = input_formatter_function(input_item, **kwargs) @@ -85,6 +86,7 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput: # populate additional information in request_input request_id = req_id_counter.next_id() if req_id_counter else i request_input.request_id = request_id + request_input.client_request_id = client_request_id request_input.tokenizer = kwargs.get("tokenizer") request_input.tgi_compat = configs.tgi_compat @@ -92,6 +94,9 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput: add_server_maintained_params(request_input, input_item, **kwargs) request = Request(request_input=request_input) requests.append(request) + logging.info( + f"[RequestId={client_request_id}] parsed and scheduled for inference" + ) except Exception as e: # pylint: disable=broad-except err_msg = "Input Parsing failed. Ensure that the request payload is valid. " # str(e) for KeyError only yields the name of the key, which isn't useful as a response to the client @@ -100,7 +105,8 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput: else: err_msg += str(e) errors[i] = err_msg - logging.warning(err_msg, exc_info=True) + logging.warning(f"[RequestId={client_request_id}" + err_msg, + exc_info=True) continue return ParsedInput(errors=errors, requests=requests, batch=batch) diff --git a/engines/python/setup/djl_python/request.py b/engines/python/setup/djl_python/request.py index 53a08d2cc..30396d24c 100644 --- a/engines/python/setup/djl_python/request.py +++ b/engines/python/setup/djl_python/request.py @@ -173,3 +173,11 @@ def set_error_code(self, code: int): Sets the HTTP Status code to return when inference fails """ self.error_code = code + + def get_client_request_id(self) -> str: + """ + Returns the requestId specified in the HTTP request + + :return: the requestId specified in the HTTP request + """ + return self.request_input.client_request_id diff --git a/engines/python/setup/djl_python/request_io.py b/engines/python/setup/djl_python/request_io.py index 9d3932a2f..20c85fb33 100644 --- a/engines/python/setup/djl_python/request_io.py +++ b/engines/python/setup/djl_python/request_io.py @@ -144,6 +144,7 @@ class RequestInput: server_parameters: parameters that are modified by the built-in handlers to support backend engines. """ request_id: int = None + client_request_id: str = None output_formatter: Union[Callable, str] = None parameters: Dict = field(default_factory=lambda: {}) server_parameters: Dict = field(default_factory=lambda: {}) diff --git a/engines/python/setup/djl_python/rolling_batch/rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/rolling_batch.py index 7b0716ee4..d2ee89019 100644 --- a/engines/python/setup/djl_python/rolling_batch/rolling_batch.py +++ b/engines/python/setup/djl_python/rolling_batch/rolling_batch.py @@ -47,17 +47,23 @@ def try_catch_handling(self, *args, **kwargs): try: return func(self, *args, **kwargs) except Exception as e: - logging.exception("Rolling batch inference error") + logging.exception( + f"Rolling batch inference error. There are {len(self.active_requests)} requests impacted. Dumping the impacted requestIds" + ) for request in self.active_requests: + logging.info( + f"[RequestId={request.get_client_request_id()}] impacted by rolling batch error" + ) + error_message = "exception occurred during rolling batch inference" token = Token(-1, "", log_prob=-1, special_token=True, - error_msg=str(e)) + error_msg=error_message) request.set_next_token(token, last_token=True, finish_reason="error") - request.set_error_message(str(e)) + request.set_error_message(error_message) # TODO: make configurable request.set_error_code(424) response = self.postprocess_results() @@ -143,7 +149,8 @@ def postprocess_results(self) -> List[dict]: res = { "data": req.get_next_token(), "last": req.is_last_token(), - "content_type": req.get_content_type() + "content_type": req.get_content_type(), + "request_id": req.get_client_request_id(), } if req.get_error_message(): res["error"] = req.get_error_message() diff --git a/engines/python/setup/djl_python/utils.py b/engines/python/setup/djl_python/utils.py index a1576cecc..a8c79183a 100644 --- a/engines/python/setup/djl_python/utils.py +++ b/engines/python/setup/djl_python/utils.py @@ -119,11 +119,13 @@ def rolling_batch_inference(parsed_input, inputs: Input, outputs: Output, outputs.add_property(f"batch_{i}_Content-Type", "application/json") else: content_type = result[idx].get("content_type") + client_request_id = result[idx].get("request_id") outputs.add(Output.binary_encode(result[idx]), key="data", batch_index=i) if content_type is not None: outputs.add_property(f"batch_{i}_Content-Type", content_type) + outputs.add_property(f"batch_{i}_requestId", client_request_id) idx += 1 return outputs