Skip to content

Commit

Permalink
[NA] Add experiment config to evaluate (#240)
Browse files Browse the repository at this point in the history
* Update fern generated code

* Add experiment_config to evaluate

* Fix lint errors

* Update evaluate e2e test

* Disable datasets deletion after the e2e test

* Fix evaluation unit test

* Fix lint errors
  • Loading branch information
alexkuzmik authored Sep 15, 2024
1 parent 0634b27 commit 9a421da
Show file tree
Hide file tree
Showing 24 changed files with 725 additions and 25 deletions.
2 changes: 1 addition & 1 deletion sdks/python/code_generation/fern/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ How to generate new client code for communication with Opik backend
1. Execute the ./build_and_run.sh script from the root of repository
2. Go to http://localhost:3003/ (URL for backend API specification)
3. Download openapi specification file - `openapi.yaml`
4. Put this file into `code_generation/fern/openapi.yaml`
4. Put this file into `code_generation/fern/openapi/openapi.yaml`
5. Run `fern generate` from inside `code_generation/fern` folder. This will generate a python code inside the directory called `sdks` near the `fern` one.
7. Replace content of `src/opik/rest_api` with the python package inside `sdks` (there will be few nested directories, navigate until you find python files)
26 changes: 21 additions & 5 deletions sdks/python/src/opik/api_objects/opik_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import datetime
import logging

from typing import Optional, Any, Dict, List
from typing import Optional, Any, Dict, List, Mapping
from ..types import SpanType, UsageDict, FeedbackScoreDict

from . import (
span,
trace,
Expand All @@ -15,7 +14,7 @@
constants,
validation_helpers,
)
from ..message_processing import streamer_constructors, messages
from ..message_processing import streamer_constructors, messages, jsonable_encoder
from ..rest_api import client as rest_api_client
from ..rest_api.types import dataset_public, trace_public, span_public
from .. import datetime_helpers, config, httpx_client
Expand Down Expand Up @@ -358,10 +357,27 @@ def create_dataset(

return result

def create_experiment(self, name: str, dataset_name: str) -> experiment.Experiment:
def create_experiment(
self,
name: str,
dataset_name: str,
experiment_config: Optional[Dict[str, Any]] = None,
) -> experiment.Experiment:
id = helpers.generate_id()

if isinstance(experiment_config, Mapping):
metadata = jsonable_encoder.jsonable_encoder(experiment_config)
else:
LOGGER.error(
"Experiment config must be dictionary, but %s was provided. Config will not be logged."
)
metadata = None

self._rest_client.experiments.create_experiment(
name=name, dataset_name=dataset_name, id=id
name=name,
dataset_name=dataset_name,
id=id,
metadata=metadata,
)

experiment_ = experiment.Experiment(
Expand Down
14 changes: 12 additions & 2 deletions sdks/python/src/opik/evaluation/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import time
from typing import List
from typing import List, Dict, Any, Optional

from .types import LLMTask
from .metrics import base_metric
Expand All @@ -15,6 +15,7 @@ def evaluate(
task: LLMTask,
scoring_metrics: List[base_metric.BaseMetric],
experiment_name: str,
experiment_config: Optional[Dict[str, Any]] = None,
verbose: int = 1,
task_threads: int = 16,
) -> evaluation_result.EvaluationResult:
Expand All @@ -27,12 +28,19 @@ def evaluate(
task: A callable object that takes DatasetItem as input and returns
dictionary which will later be used for scoring
experiment_name: The name of the experiment associated with evaluation run
experiment_config: The dictionary with parameters that describe experiment
scoring_metrics: List of metrics to calculate during evaluation.
Each metric has `score(...)` method, arguments for this method
are taken from the `task` output, check the signature
of the `score` method in metrics that you need to find out which keys
are mandatory in `task`-returned dictionary.
verbose: an integer value that controls evaluation output logs such as summary and tqdm progress bar.
0 - no outputs, 1 - outputs are enabled (default).
task_threads: amount of thread workers to run tasks. If set to 1, no additional
threads are created, all tasks executed in the current thread sequentially.
are executed sequentially in the current thread.
Expand All @@ -58,7 +66,9 @@ def evaluate(
scores_logger.log_scores(client=client, test_results=test_results)

experiment = client.create_experiment(
name=experiment_name, dataset_name=dataset.name
name=experiment_name,
dataset_name=dataset.name,
experiment_config=experiment_config,
)
experiment_items = [
experiment_item.ExperimentItem(
Expand Down
15 changes: 14 additions & 1 deletion sdks/python/src/opik/rest_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
CategoricalFeedbackDetailCreate,
CategoricalFeedbackDetailPublic,
CategoricalFeedbackDetailUpdate,
ChunkedOutputJsonNode,
ChunkedOutputJsonNodeType,
Dataset,
DatasetItem,
DatasetItemBatch,
Expand Down Expand Up @@ -75,13 +77,18 @@
ProjectPagePublic,
ProjectPublic,
Span,
SpanBatch,
SpanPagePublic,
SpanPublic,
SpanPublicType,
SpanType,
SpanWrite,
SpanWriteType,
Trace,
TraceBatch,
TracePagePublic,
TracePublic,
TraceWrite,
)
from .errors import (
BadRequestError,
Expand All @@ -93,7 +100,7 @@
from . import datasets, experiments, feedback_definitions, projects, spans, traces
from .environment import OpikApiEnvironment
from .feedback_definitions import FindFeedbackDefinitionsRequestType
from .spans import GetSpansByProjectRequestType, SpanWriteType
from .spans import GetSpansByProjectRequestType

__all__ = [
"BadRequestError",
Expand All @@ -105,6 +112,8 @@
"CategoricalFeedbackDetailCreate",
"CategoricalFeedbackDetailPublic",
"CategoricalFeedbackDetailUpdate",
"ChunkedOutputJsonNode",
"ChunkedOutputJsonNodeType",
"ConflictError",
"Dataset",
"DatasetItem",
Expand Down Expand Up @@ -177,14 +186,18 @@
"ProjectPagePublic",
"ProjectPublic",
"Span",
"SpanBatch",
"SpanPagePublic",
"SpanPublic",
"SpanPublicType",
"SpanType",
"SpanWrite",
"SpanWriteType",
"Trace",
"TraceBatch",
"TracePagePublic",
"TracePublic",
"TraceWrite",
"UnprocessableEntityError",
"datasets",
"experiments",
Expand Down
76 changes: 76 additions & 0 deletions sdks/python/src/opik/rest_api/client.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# This file was auto-generated by Fern from our API Definition.

import typing
from json.decoder import JSONDecodeError

import httpx

from .core.api_error import ApiError
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from .core.pydantic_utilities import pydantic_v1
from .core.request_options import RequestOptions
from .datasets.client import AsyncDatasetsClient, DatasetsClient
from .environment import OpikApiEnvironment
from .experiments.client import AsyncExperimentsClient, ExperimentsClient
Expand Down Expand Up @@ -83,6 +87,38 @@ def __init__(
self.spans = SpansClient(client_wrapper=self._client_wrapper)
self.traces = TracesClient(client_wrapper=self._client_wrapper)

def is_alive(
self, *, request_options: typing.Optional[RequestOptions] = None
) -> typing.Any:
"""
Parameters
----------
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
typing.Any
default response
Examples
--------
from Opik.client import OpikApi
client = OpikApi()
client.is_alive()
"""
_response = self._client_wrapper.httpx_client.request(
"is-alive/ping", method="GET", request_options=request_options
)
try:
if 200 <= _response.status_code < 300:
return pydantic_v1.parse_obj_as(typing.Any, _response.json()) # type: ignore
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, body=_response.text)
raise ApiError(status_code=_response.status_code, body=_response_json)


class AsyncOpikApi:
"""
Expand Down Expand Up @@ -150,6 +186,46 @@ def __init__(
self.spans = AsyncSpansClient(client_wrapper=self._client_wrapper)
self.traces = AsyncTracesClient(client_wrapper=self._client_wrapper)

async def is_alive(
self, *, request_options: typing.Optional[RequestOptions] = None
) -> typing.Any:
"""
Parameters
----------
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
typing.Any
default response
Examples
--------
import asyncio
from Opik.client import AsyncOpikApi
client = AsyncOpikApi()
async def main() -> None:
await client.is_alive()
asyncio.run(main())
"""
_response = await self._client_wrapper.httpx_client.request(
"is-alive/ping", method="GET", request_options=request_options
)
try:
if 200 <= _response.status_code < 300:
return pydantic_v1.parse_obj_as(typing.Any, _response.json()) # type: ignore
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, body=_response.text)
raise ApiError(status_code=_response.status_code, body=_response_json)


def _get_base_url(
*, base_url: typing.Optional[str] = None, environment: OpikApiEnvironment
Expand Down
21 changes: 19 additions & 2 deletions sdks/python/src/opik/rest_api/experiments/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ..types.experiment_item_public import ExperimentItemPublic
from ..types.experiment_page_public import ExperimentPagePublic
from ..types.experiment_public import ExperimentPublic
from ..types.json_node_write import JsonNodeWrite

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)
Expand Down Expand Up @@ -79,6 +80,7 @@ def create_experiment(
dataset_name: str,
name: str,
id: typing.Optional[str] = OMIT,
metadata: typing.Optional[JsonNodeWrite] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> None:
"""
Expand All @@ -92,6 +94,8 @@ def create_experiment(
id : typing.Optional[str]
metadata : typing.Optional[JsonNodeWrite]
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Expand All @@ -112,7 +116,12 @@ def create_experiment(
_response = self._client_wrapper.httpx_client.request(
"v1/private/experiments",
method="POST",
json={"id": id, "dataset_name": dataset_name, "name": name},
json={
"id": id,
"dataset_name": dataset_name,
"name": name,
"metadata": metadata,
},
request_options=request_options,
omit=OMIT,
)
Expand Down Expand Up @@ -377,6 +386,7 @@ async def create_experiment(
dataset_name: str,
name: str,
id: typing.Optional[str] = OMIT,
metadata: typing.Optional[JsonNodeWrite] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> None:
"""
Expand All @@ -390,6 +400,8 @@ async def create_experiment(
id : typing.Optional[str]
metadata : typing.Optional[JsonNodeWrite]
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Expand Down Expand Up @@ -418,7 +430,12 @@ async def main() -> None:
_response = await self._client_wrapper.httpx_client.request(
"v1/private/experiments",
method="POST",
json={"id": id, "dataset_name": dataset_name, "name": name},
json={
"id": id,
"dataset_name": dataset_name,
"name": name,
"metadata": metadata,
},
request_options=request_options,
omit=OMIT,
)
Expand Down
4 changes: 2 additions & 2 deletions sdks/python/src/opik/rest_api/spans/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was auto-generated by Fern from our API Definition.

from .types import GetSpansByProjectRequestType, SpanWriteType
from .types import GetSpansByProjectRequestType

__all__ = ["GetSpansByProjectRequestType", "SpanWriteType"]
__all__ = ["GetSpansByProjectRequestType"]
Loading

0 comments on commit 9a421da

Please sign in to comment.