From 0107e307eae87f1918f9baf2cb24567ad0838e6b Mon Sep 17 00:00:00 2001
From: Siddharth Venkatesan <siddhave@amazon.com>
Date: Tue, 22 Oct 2024 21:52:36 -0700
Subject: [PATCH] [lmi] add minimal requestId logging for new requests, and
 exceptions/errors (#2472)

---
 engines/python/setup/djl_python/input_parser.py   |  8 +++++++-
 engines/python/setup/djl_python/request.py        |  8 ++++++++
 engines/python/setup/djl_python/request_io.py     |  1 +
 .../djl_python/rolling_batch/rolling_batch.py     | 15 +++++++++++----
 engines/python/setup/djl_python/utils.py          |  2 ++
 5 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/engines/python/setup/djl_python/input_parser.py b/engines/python/setup/djl_python/input_parser.py
index 31c46b001..d0d997b8f 100644
--- a/engines/python/setup/djl_python/input_parser.py
+++ b/engines/python/setup/djl_python/input_parser.py
@@ -78,6 +78,7 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
     input_formatter_function = configs.input_formatter if configs.input_formatter else format_input
     for i in range(start_batch_id, len(batch)):
         input_item = batch[i]
+        client_request_id = input_item.get_property("requestId")
         try:
             # input formatter can be user written as well. We look for model.py and search for the decorator.
             request_input = input_formatter_function(input_item, **kwargs)
@@ -85,6 +86,7 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
             # populate additional information in request_input
             request_id = req_id_counter.next_id() if req_id_counter else i
             request_input.request_id = request_id
+            request_input.client_request_id = client_request_id
             request_input.tokenizer = kwargs.get("tokenizer")
             request_input.tgi_compat = configs.tgi_compat
 
@@ -92,6 +94,9 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
             add_server_maintained_params(request_input, input_item, **kwargs)
             request = Request(request_input=request_input)
             requests.append(request)
+            logging.info(
+                f"[RequestId={client_request_id}] parsed and scheduled for inference"
+            )
         except Exception as e:  # pylint: disable=broad-except
             err_msg = "Input Parsing failed. Ensure that the request payload is valid. "
             # str(e) for KeyError only yields the name of the key, which isn't useful as a response to the client
@@ -100,7 +105,8 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
             else:
                 err_msg += str(e)
             errors[i] = err_msg
-            logging.warning(err_msg, exc_info=True)
+            logging.warning(f"[RequestId={client_request_id}" + err_msg,
+                            exc_info=True)
             continue
 
     return ParsedInput(errors=errors, requests=requests, batch=batch)
diff --git a/engines/python/setup/djl_python/request.py b/engines/python/setup/djl_python/request.py
index 53a08d2cc..30396d24c 100644
--- a/engines/python/setup/djl_python/request.py
+++ b/engines/python/setup/djl_python/request.py
@@ -173,3 +173,11 @@ def set_error_code(self, code: int):
         Sets the HTTP Status code to return when inference fails
         """
         self.error_code = code
+
+    def get_client_request_id(self) -> str:
+        """
+        Returns the requestId specified in the HTTP request
+
+        :return: the requestId specified in the HTTP request
+        """
+        return self.request_input.client_request_id
diff --git a/engines/python/setup/djl_python/request_io.py b/engines/python/setup/djl_python/request_io.py
index 9d3932a2f..20c85fb33 100644
--- a/engines/python/setup/djl_python/request_io.py
+++ b/engines/python/setup/djl_python/request_io.py
@@ -144,6 +144,7 @@ class RequestInput:
         server_parameters: parameters that are modified by the built-in handlers to support backend engines.
     """
     request_id: int = None
+    client_request_id: str = None
     output_formatter: Union[Callable, str] = None
     parameters: Dict = field(default_factory=lambda: {})
     server_parameters: Dict = field(default_factory=lambda: {})
diff --git a/engines/python/setup/djl_python/rolling_batch/rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/rolling_batch.py
index 7b0716ee4..d2ee89019 100644
--- a/engines/python/setup/djl_python/rolling_batch/rolling_batch.py
+++ b/engines/python/setup/djl_python/rolling_batch/rolling_batch.py
@@ -47,17 +47,23 @@ def try_catch_handling(self, *args, **kwargs):
         try:
             return func(self, *args, **kwargs)
         except Exception as e:
-            logging.exception("Rolling batch inference error")
+            logging.exception(
+                f"Rolling batch inference error. There are {len(self.active_requests)} requests impacted. Dumping the impacted requestIds"
+            )
             for request in self.active_requests:
+                logging.info(
+                    f"[RequestId={request.get_client_request_id()}] impacted by rolling batch error"
+                )
+                error_message = "exception occurred during rolling batch inference"
                 token = Token(-1,
                               "",
                               log_prob=-1,
                               special_token=True,
-                              error_msg=str(e))
+                              error_msg=error_message)
                 request.set_next_token(token,
                                        last_token=True,
                                        finish_reason="error")
-                request.set_error_message(str(e))
+                request.set_error_message(error_message)
                 # TODO: make configurable
                 request.set_error_code(424)
             response = self.postprocess_results()
@@ -143,7 +149,8 @@ def postprocess_results(self) -> List[dict]:
             res = {
                 "data": req.get_next_token(),
                 "last": req.is_last_token(),
-                "content_type": req.get_content_type()
+                "content_type": req.get_content_type(),
+                "request_id": req.get_client_request_id(),
             }
             if req.get_error_message():
                 res["error"] = req.get_error_message()
diff --git a/engines/python/setup/djl_python/utils.py b/engines/python/setup/djl_python/utils.py
index a1576cecc..a8c79183a 100644
--- a/engines/python/setup/djl_python/utils.py
+++ b/engines/python/setup/djl_python/utils.py
@@ -119,11 +119,13 @@ def rolling_batch_inference(parsed_input, inputs: Input, outputs: Output,
             outputs.add_property(f"batch_{i}_Content-Type", "application/json")
         else:
             content_type = result[idx].get("content_type")
+            client_request_id = result[idx].get("request_id")
             outputs.add(Output.binary_encode(result[idx]),
                         key="data",
                         batch_index=i)
             if content_type is not None:
                 outputs.add_property(f"batch_{i}_Content-Type", content_type)
+            outputs.add_property(f"batch_{i}_requestId", client_request_id)
             idx += 1
     return outputs