Skip to content

Commit

Permalink
Opik 640 sdk improve ux in evaluate function when provider rate limit…
Browse files Browse the repository at this point in the history
… exceeded (#923)

* Add rate limit message to metrics calculation step in evaluate flow

* Add exception_analyzer.is_llm_provider_rate_limit_error check to user defined task object

* Fix lint errors

* Suppress debug info for litellm

* Fix lint errors

* Small refactor in exception_analyzer

* Fix lint errors
  • Loading branch information
alexkuzmik authored Dec 19, 2024
1 parent 6070d73 commit ad165a6
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 5 deletions.
15 changes: 15 additions & 0 deletions sdks/python/src/opik/evaluation/exception_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import openai
import litellm.exceptions


def is_llm_provider_rate_limit_error(exception: Exception) -> bool:
rate_limit_error_known_types = (
openai.RateLimitError,
litellm.exceptions.RateLimitError,
)

is_rate_limit_error = isinstance(exception, rate_limit_error_known_types) or (
hasattr(exception, "status_code") and exception.status_code == 429
)

return is_rate_limit_error
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from opik import semantic_version

LOGGER = logging.getLogger(__name__)
litellm.suppress_debug_info = True # to disable colorized prints with links to litellm whenever an LLM provider raises an error


class LiteLLMChatModel(base_model.OpikBaseModel):
Expand Down
23 changes: 18 additions & 5 deletions sdks/python/src/opik/evaluation/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

import tqdm

from opik import context_storage, exceptions, opik_context, track
from opik import context_storage, exceptions, opik_context, track, logging_messages
from opik.api_objects import opik_client, trace
from opik.api_objects.dataset import dataset, dataset_item
from opik.api_objects.experiment import experiment, experiment_item

from opik.decorator import error_info_collector
from opik.types import ErrorInfoDict
from . import test_case, test_result
from . import test_case, test_result, exception_analyzer
from .metrics import arguments_helpers, base_metric, score_result
from .types import LLMTask

Expand Down Expand Up @@ -42,17 +43,25 @@ def _score_test_case(
score_results.append(result)
except exceptions.ScoreMethodMissingArguments:
raise
except Exception as e:
except Exception as exception:
# This can be problematic if the metric returns a list of strings as we will not know the name of the metrics that have failed
LOGGER.error(
"Failed to compute metric %s. Score result will be marked as failed.",
metric.name,
exc_info=True,
)

if exception_analyzer.is_llm_provider_rate_limit_error(exception):
LOGGER.error(
logging_messages.LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION
)

score_results.append(
score_result.ScoreResult(
name=metric.name, value=0.0, reason=str(e), scoring_failed=True
name=metric.name,
value=0.0,
reason=str(exception),
scoring_failed=True,
)
)

Expand Down Expand Up @@ -95,6 +104,11 @@ def _process_item(
try:
task_output_ = task(item_content)
except Exception as exception:
if exception_analyzer.is_llm_provider_rate_limit_error(exception):
LOGGER.error(
logging_messages.LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION
)

error_info = error_info_collector.collect(exception)
raise
LOGGER.debug("Task finished, output: %s", task_output_)
Expand All @@ -118,7 +132,6 @@ def _process_item(
test_case_=test_case_, scoring_metrics=scoring_metrics
)
return test_result_

finally:
trace_data = context_storage.pop_trace_data() # type: ignore

Expand Down
2 changes: 2 additions & 0 deletions sdks/python/src/opik/logging_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,5 @@
)

PARSE_API_KEY_TOO_MANY_PARTS = "Too many parts (%d) found in the Opik API key: %r"

LLM_PROVIDER_RATE_LIMIT_ERROR_DETECTED_IN_EVALUATE_FUNCTION = "LLM provider rate limit error detected. We recommend reducing the amount of parallel requests by setting `task_threads` evaluation parameter to a smaller number"

0 comments on commit ad165a6

Please sign in to comment.