Opik 640 sdk improve ux in evaluate function when provider rate limit…

… exceeded (#923) * Add rate limit message to metrics calculation step in evaluate flow * Add exception_analyzer.is_llm_provider_rate_limit_error check to user defined task object * Fix lint errors * Suppress debug info for litellm * Fix lint errors * Small refactor in exception_analyzer * Fix lint errors
comet-ml · Dec 19, 2024 · ad165a6 · ad165a6
1 parent 6070d73
commit ad165a6
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 5 deletions.
diff --git a/sdks/python/src/opik/evaluation/exception_analyzer.py b/sdks/python/src/opik/evaluation/exception_analyzer.py
@@ -0,0 +1,15 @@
+import openai
+import litellm.exceptions
+
+
+def is_llm_provider_rate_limit_error(exception: Exception) -> bool:
+    rate_limit_error_known_types = (
+        openai.RateLimitError,
+        litellm.exceptions.RateLimitError,
+    )
+
+    is_rate_limit_error = isinstance(exception, rate_limit_error_known_types) or (
+        hasattr(exception, "status_code") and exception.status_code == 429
+    )
+
+    return is_rate_limit_error
diff --git a/sdks/python/src/opik/evaluation/models/litellm_chat_model.py b/sdks/python/src/opik/evaluation/models/litellm_chat_model.py
@@ -10,6 +10,7 @@
 from opik import semantic_version
 
 LOGGER = logging.getLogger(__name__)
+litellm.suppress_debug_info = True  # to disable colorized prints with links to litellm whenever an LLM provider raises an error
 
 
 class LiteLLMChatModel(base_model.OpikBaseModel):

diff --git a/sdks/python/src/opik/evaluation/scorer.py b/sdks/python/src/opik/evaluation/scorer.py
@@ -4,13 +4,14 @@
 
 import tqdm
 
-from opik import context_storage, exceptions, opik_context, track
+from opik import context_storage, exceptions, opik_context, track, logging_messages
 from opik.api_objects import opik_client, trace
 from opik.api_objects.dataset import dataset, dataset_item
 from opik.api_objects.experiment import experiment, experiment_item
+
 from opik.decorator import error_info_collector
 from opik.types import ErrorInfoDict
-from . import test_case, test_result
+from . import test_case, test_result, exception_analyzer
 from .metrics import arguments_helpers, base_metric, score_result
 from .types import LLMTask
 
@@ -42,17 +43,25 @@ def _score_test_case(
                 score_results.append(result)
         except exceptions.ScoreMethodMissingArguments:
             raise
-        except Exception as e:
+        except Exception as exception:
             # This can be problematic if the metric returns a list of strings as we will not know the name of the metrics that have failed
             LOGGER.error(
                 "Failed to compute metric %s. Score result will be marked as failed.",
                 metric.name,
                 exc_info=True,
             )
 
+            if exception_analyzer.is_llm_provider_rate_limit_error(exception):
+                LOGGER.error(
+                    logging_messages.LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION
+                )
+
             score_results.append(
                 score_result.ScoreResult(
-                    name=metric.name, value=0.0, reason=str(e), scoring_failed=True
+                    name=metric.name,
+                    value=0.0,
+                    reason=str(exception),
+                    scoring_failed=True,
                 )
             )
 
@@ -95,6 +104,11 @@ def _process_item(
         try:
             task_output_ = task(item_content)
         except Exception as exception:
+            if exception_analyzer.is_llm_provider_rate_limit_error(exception):
+                LOGGER.error(
+                    logging_messages.LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION
+                )
+
             error_info = error_info_collector.collect(exception)
             raise
         LOGGER.debug("Task finished, output: %s", task_output_)
@@ -118,7 +132,6 @@ def _process_item(
             test_case_=test_case_, scoring_metrics=scoring_metrics
         )
         return test_result_
-
     finally:
         trace_data = context_storage.pop_trace_data()  # type: ignore
 

diff --git a/sdks/python/src/opik/logging_messages.py b/sdks/python/src/opik/logging_messages.py
@@ -52,3 +52,5 @@
 )
 
 PARSE_API_KEY_TOO_MANY_PARTS = "Too many parts (%d) found in the Opik API key: %r"
+
+LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION = "LLM provider rate limit error detected. We recommend reducing the amount of parallel requests by setting `task_threads` evaluation parameter to a smaller number"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -52,3 +52,5 @@
		)

		PARSE_API_KEY_TOO_MANY_PARTS = "Too many parts (%d) found in the Opik API key: %r"

		LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION = "LLM provider rate limit error detected. We recommend reducing the amount of parallel requests by setting `task_threads` evaluation parameter to a smaller number"