ansible · manstis · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/ansible_ai_connect/ai/api/api_wrapper.py b/ansible_ai_connect/ai/api/api_wrapper.py
@@ -0,0 +1,137 @@
+#  Copyright Red Hat
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import functools
+import json
+import logging
+from typing import Callable
+
+from django.conf import settings
+from rest_framework.exceptions import ValidationError
+
+from ansible_ai_connect.ai.api.exceptions import (
+    FeatureNotAvailable,
+    ModelTimeoutException,
+    ServiceUnavailable,
+    WcaBadRequestException,
+    WcaCloudflareRejectionException,
+    WcaEmptyResponseException,
+    WcaInvalidModelIdException,
+    WcaKeyNotFoundException,
+    WcaModelIdNotFoundException,
+    WcaNoDefaultModelIdException,
+    WcaSuggestionIdCorrelationFailureException,
+    WcaUserTrialExpiredException,
+)
+from ansible_ai_connect.ai.api.model_client.exceptions import (
+    ModelTimeoutError,
+    WcaBadRequest,
+    WcaCloudflareRejection,
+    WcaEmptyResponse,
+    WcaInvalidModelId,
+    WcaKeyNotFound,
+    WcaModelIdNotFound,
+    WcaNoDefaultModelId,
+    WcaSuggestionIdCorrelationFailure,
+    WcaUserTrialExpired,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def call(api_type: str, identifier_provider: Callable[[], str]):
+
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                identifier = identifier_provider()
+                value = func(*args, **kwargs)
+                return value
+            except ModelTimeoutError as e:
+                logger.warning(
+                    f"model timed out after {settings.ANSIBLE_AI_MODEL_MESH_API_TIMEOUT} "
+                    f"seconds (per task) for {api_type}: {identifier}"
+                )
+                raise ModelTimeoutException(cause=e)
+
+            except WcaBadRequest as e:
+                logger.error(
+                    f"bad request from WCA for completion for {api_type}: {identifier}:"
+                    f" {json.dumps(e.json_response)}"
+                )
+                raise WcaBadRequestException(cause=e)
+
+            except WcaInvalidModelId as e:
+                logger.info(f"WCA Model ID is invalid for {api_type}: {identifier}")
+                raise WcaInvalidModelIdException(cause=e)
+
+            except WcaKeyNotFound as e:
+                logger.info(
+                    f"A WCA Api Key was expected but not found for {api_type}: {identifier}"
+                )
+                raise WcaKeyNotFoundException(cause=e)
+
+            except WcaNoDefaultModelId as e:
+                logger.info(f"No default WCA Model ID was found for {api_type}: {identifier}")
+                raise WcaNoDefaultModelIdException(cause=e)
+
+            except WcaModelIdNotFound as e:
+                logger.info(
+                    f"A WCA Model ID was expected but not found for {api_type}: {identifier}"
+                )
+                raise WcaModelIdNotFoundException(cause=e)
+
+            except WcaSuggestionIdCorrelationFailure as e:
+                logger.info(
+                    f"WCA Request/Response SuggestionId correlation failed for "
+                    f"{api_type}: {identifier} and x_request_id: {e.x_request_id}"
+                )
+                raise WcaSuggestionIdCorrelationFailureException(cause=e)
+
+            except WcaEmptyResponse as e:
+                logger.info(f"WCA returned an empty response for suggestion {identifier}")
+                raise WcaEmptyResponseException(cause=e)
+
+            except WcaCloudflareRejection as e:
+                logger.exception(f"Cloudflare rejected the request for {api_type}: {identifier}")
+                raise WcaCloudflareRejectionException(cause=e)
+
+            except WcaUserTrialExpired as e:
+                logger.exception(f"User trial expired, when requesting {api_type}: {identifier}")
+                raise WcaUserTrialExpiredException(cause=e)
+
+            except ValidationError as e:
+                logger.exception(
+                    f"An exception {e.__class__} occurred "
+                    f"during validation of {api_type}: {identifier}"
+                )
+                raise
+
+            except FeatureNotAvailable:
+                logger.exception(
+                    f"The requested feature is unavailable for {api_type}: {identifier}"
+                )
+                raise
+
+            except Exception as e:
+                logger.exception(
+                    f"An unhandled exception {e.__class__} occurred "
+                    f"during processing of {api_type}: {identifier}"
+                )
+                raise ServiceUnavailable(cause=e)
+
+        return wrapper
+
+    return decorator
diff --git a/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py b/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py
@@ -260,6 +260,7 @@ def setUp(self):
         wca_client.session = Mock()
         response = Mock
         response.text = '{"playbook": "Oh!", "outline": "Ahh!", "explanation": "!Óh¡"}'
+        response.status_code = 200
         response.raise_for_status = Mock()
         wca_client.session.post.return_value = response
         self.wca_client = wca_client

diff --git a/ansible_ai_connect/ai/api/model_client/wca_client.py b/ansible_ai_connect/ai/api/model_client/wca_client.py
@@ -491,7 +491,10 @@ def generate_playbook(
             headers=headers,
             json=data,
         )
+        context = Context(model_id, result, False)
+        InferenceResponseChecks().run_checks(context)
         result.raise_for_status()
+
         response = json.loads(result.text)
 
         playbook = response["playbook"]
@@ -517,7 +520,10 @@ def explain_playbook(self, request, content: str) -> str:
             headers=headers,
             json=data,
         )
+        context = Context(model_id, result, False)
+        InferenceResponseChecks().run_checks(context)
         result.raise_for_status()
+
         response = json.loads(result.text)
         return response["explanation"]
 

diff --git a/ansible_ai_connect/ai/api/pipelines/completion_stages/inference2.py b/ansible_ai_connect/ai/api/pipelines/completion_stages/inference2.py
@@ -0,0 +1,139 @@
+#  Copyright Red Hat
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import time
+from string import Template
+
+from ansible_anonymizer import anonymizer
+from django.apps import apps
+from django_prometheus.conf import NAMESPACE
+from prometheus_client import Histogram
+
+from ansible_ai_connect.ai.api.api_wrapper import call
+from ansible_ai_connect.ai.api.data.data_model import ModelMeshPayload
+from ansible_ai_connect.ai.api.exceptions import (
+    BaseWisdomAPIException,
+    WcaUserTrialExpiredException,
+    process_error_count,
+)
+from ansible_ai_connect.ai.api.pipelines.common import PipelineElement
+from ansible_ai_connect.ai.api.pipelines.completion_context import CompletionContext
+from ansible_ai_connect.ai.api.utils.segment import send_segment_event
+from ansible_ai_connect.ai.feature_flags import FeatureFlags
+
+logger = logging.getLogger(__name__)
+
+feature_flags = FeatureFlags()
+
+completions_hist = Histogram(
+    "model_prediction_latency_seconds",
+    "Histogram of model prediction processing time",
+    namespace=NAMESPACE,
+)
+
+
+class InferenceStage2(PipelineElement):
+    def process(self, context: CompletionContext) -> None:
+
+        payload = context.payload
+        suggestion_id = payload.suggestionId
+
+        @call("suggestions", lambda: str(suggestion_id))
+        def get_predictions() -> None:
+            request = context.request
+            model_mesh_client = apps.get_app_config("ai").model_mesh_client
+            # We have a little inconsistency of the "model" term throughout the application:
+            # - FeatureFlags use 'model_name'
+            # - ModelMeshClient uses 'model_id'
+            # - Public completion API uses 'model'
+            # - Segment Events use 'modelName'
+            model_id = payload.model
+
+            model_mesh_payload = ModelMeshPayload(
+                instances=[
+                    {
+                        "prompt": payload.prompt,
+                        "context": payload.context,
+                        "suggestionId": str(suggestion_id),
+                    }
+                ]
+            )
+            data = model_mesh_payload.dict()
+            logger.debug(f"input to inference for suggestion id {suggestion_id}:\n{data}")
+
+            predictions = None
+            exception = None
+            event = None
+            event_name = None
+            start_time = time.time()
+
+            try:
+                predictions = model_mesh_client.infer(
+                    request, data, model_id=model_id, suggestion_id=suggestion_id
+                )
+                model_id = predictions.get("model_id", model_id)
+
+            except WcaUserTrialExpiredException as e:
+                exception = e
+                event = {
+                    "type": "prediction",
+                    "modelName": model_id,
+                    "suggestionId": str(suggestion_id),
+                }
+                event_name = "trialExpired"
+                raise
+
+            except Exception as e:
+                exception = e
+                raise
+            finally:
+                duration = round((time.time() - start_time) * 1000, 2)
+                completions_hist.observe(duration / 1000)  # millisec back to seconds
+                anonymized_predictions = anonymizer.anonymize_struct(
+                    predictions, value_template=Template("{{ _${variable_name}_ }}")
+                )
+                # If an exception was thrown during the backend call, try to get the model ID
+                # that is contained in the exception.
+                if exception:
+                    process_error_count.labels(stage="prediction").inc()
+                    model_id_in_exception = BaseWisdomAPIException.get_model_id_from_exception(
+                        exception
+                    )
+                    if model_id_in_exception:
+                        model_id = model_id_in_exception
+                if event:
+                    event["modelName"] = model_id
+                else:
+                    event = {
+                        "duration": duration,
+                        "exception": exception is not None,
+                        "modelName": model_id,
+                        "problem": None if exception is None else exception.__class__.__name__,
+                        "request": data,
+                        "response": anonymized_predictions,
+                        "suggestionId": str(suggestion_id),
+                    }
+                event_name = event_name if event_name else "prediction"
+                send_segment_event(event, event_name, request.user)
+
+                logger.debug(
+                    f"response from inference for suggestion id {suggestion_id}:\n{predictions}"
+                )
+
+                context.model_id = model_id
+                context.predictions = predictions
+                context.anonymized_predictions = anonymized_predictions
+
+        get_predictions()
diff --git a/ansible_ai_connect/ai/api/pipelines/completions.py b/ansible_ai_connect/ai/api/pipelines/completions.py
@@ -22,8 +22,8 @@
 from ansible_ai_connect.ai.api.pipelines.completion_stages.deserialise import (
     DeserializeStage,
 )
-from ansible_ai_connect.ai.api.pipelines.completion_stages.inference import (
-    InferenceStage,
+from ansible_ai_connect.ai.api.pipelines.completion_stages.inference2 import (
+    InferenceStage2,
 )
 from ansible_ai_connect.ai.api.pipelines.completion_stages.post_process import (
     PostProcessStage,
@@ -45,7 +45,7 @@ def __init__(self, request: Request):
             [
                 DeserializeStage(),
                 PreProcessStage(),
-                InferenceStage(),
+                InferenceStage2(),
                 PostProcessStage(),
                 ResponseStage(),
             ],

diff --git a/ansible_ai_connect/ai/api/tests/test_views.py b/ansible_ai_connect/ai/api/tests/test_views.py
@@ -680,8 +680,8 @@ def test_wca_completion_request_id_correlation_failure(self):
                     properties = event["properties"]
                     self.assertTrue(properties["exception"])
                     self.assertEqual(properties["problem"], "WcaSuggestionIdCorrelationFailure")
-            self.assertInLog(f"suggestion_id: '{DEFAULT_SUGGESTION_ID}'", log)
-            self.assertInLog(f"x_request_id: '{x_request_id}'", log)
+            self.assertInLog(f"suggestions: {DEFAULT_SUGGESTION_ID}", log)
+            self.assertInLog(f"x_request_id: {x_request_id}", log)
 
     @override_settings(WCA_SECRET_DUMMY_SECRETS="1:valid")
     @override_settings(SEGMENT_WRITE_KEY="DUMMY_KEY_VALUE")
@@ -2187,7 +2187,7 @@ def setUp(self):
         self.model_client.get_api_key = Mock(return_value="org-api-key")
 
     @override_settings(SEGMENT_WRITE_KEY="DUMMY_KEY_VALUE")
-    @patch("ansible_ai_connect.ai.api.views.send_segment_event")
+    @patch("ansible_ai_connect.ai.api.views2.send_segment_event")
     def test_wca_contentmatch_segment_events_with_seated_user(self, mock_send_segment_event):
         self.user.rh_user_has_seat = True
         self.model_client.get_model_id = Mock(return_value="model-id")
@@ -2234,7 +2234,7 @@ def test_wca_contentmatch_segment_events_with_seated_user(self, mock_send_segmen
         self.assertTrue(event_request.items() <= actual_event.get("request").items())
 
     @override_settings(SEGMENT_WRITE_KEY="DUMMY_KEY_VALUE")
-    @patch("ansible_ai_connect.ai.api.views.send_segment_event")
+    @patch("ansible_ai_connect.ai.api.views2.send_segment_event")
     def test_wca_contentmatch_segment_events_with_invalid_modelid_error(
         self, mock_send_segment_event
     ):
@@ -2276,7 +2276,7 @@ def test_wca_contentmatch_segment_events_with_invalid_modelid_error(
         self.assertTrue(event_request.items() <= actual_event.get("request").items())
 
     @override_settings(SEGMENT_WRITE_KEY="DUMMY_KEY_VALUE")
-    @patch("ansible_ai_connect.ai.api.views.send_segment_event")
+    @patch("ansible_ai_connect.ai.api.views2.send_segment_event")
     def test_wca_contentmatch_segment_events_with_empty_response_error(
         self, mock_send_segment_event
     ):
@@ -2321,7 +2321,7 @@ def test_wca_contentmatch_segment_events_with_empty_response_error(
         self.assertTrue(event_request.items() <= actual_event.get("request").items())
 
     @override_settings(SEGMENT_WRITE_KEY="DUMMY_KEY_VALUE")
-    @patch("ansible_ai_connect.ai.api.views.send_segment_event")
+    @patch("ansible_ai_connect.ai.api.views2.send_segment_event")
     def test_wca_contentmatch_segment_events_with_key_error(self, mock_send_segment_event):
         self.user.rh_user_has_seat = True
         self.model_client.get_api_key = Mock(side_effect=WcaKeyNotFound)
@@ -2522,9 +2522,8 @@ def test_service_unavailable(self, invoke):
         }
 
         self.client.force_authenticate(user=self.user)
-        with self.assertRaises(Exception):
-            r = self.client.post(reverse("explanations"), payload, format="json")
-            self.assertEqual(r.status_code, HTTPStatus.SERVICE_UNAVAILABLE)
+        r = self.client.post(reverse("explanations"), payload, format="json")
+        self.assertEqual(r.status_code, HTTPStatus.SERVICE_UNAVAILABLE)
 
 
 @override_settings(ANSIBLE_AI_MODEL_MESH_API_TYPE="dummy")

diff --git a/ansible_ai_connect/ai/api/urls.py b/ansible_ai_connect/ai/api/urls.py
@@ -14,7 +14,10 @@
 
 from django.urls import path
 
-from .views import Completions, ContentMatches, Explanation, Feedback, Generation
+from .views import Completions, Feedback
+from .views2 import ContentMatches
+from .views3 import Generation
+from .views4 import Explanation
 
 urlpatterns = [
     path("completions/", Completions.as_view(), name="completions"),