diff --git a/sdks/python/code_generation/fern/readme.md b/sdks/python/code_generation/fern/readme.md index 62dda37506..36e73d89c9 100644 --- a/sdks/python/code_generation/fern/readme.md +++ b/sdks/python/code_generation/fern/readme.md @@ -5,6 +5,6 @@ How to generate new client code for communication with Opik backend 1. Execute the ./build_and_run.sh script from the root of repository 2. Go to http://localhost:3003/ (URL for backend API specification) 3. Download openapi specification file - `openapi.yaml` -4. Put this file into `code_generation/fern/openapi.yaml` +4. Put this file into `code_generation/fern/openapi/openapi.yaml` 5. Run `fern generate` from inside `code_generation/fern` folder. This will generate a python code inside the directory called `sdks` near the `fern` one. 7. Replace content of `src/opik/rest_api` with the python package inside `sdks` (there will be few nested directories, navigate until you find python files) diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py index c794490677..df62e5db2c 100644 --- a/sdks/python/src/opik/api_objects/opik_client.py +++ b/sdks/python/src/opik/api_objects/opik_client.py @@ -3,9 +3,8 @@ import datetime import logging -from typing import Optional, Any, Dict, List +from typing import Optional, Any, Dict, List, Mapping from ..types import SpanType, UsageDict, FeedbackScoreDict - from . import ( span, trace, @@ -15,7 +14,7 @@ constants, validation_helpers, ) -from ..message_processing import streamer_constructors, messages +from ..message_processing import streamer_constructors, messages, jsonable_encoder from ..rest_api import client as rest_api_client from ..rest_api.types import dataset_public, trace_public, span_public from .. import datetime_helpers, config, httpx_client @@ -358,10 +357,27 @@ def create_dataset( return result - def create_experiment(self, name: str, dataset_name: str) -> experiment.Experiment: + def create_experiment( + self, + name: str, + dataset_name: str, + experiment_config: Optional[Dict[str, Any]] = None, + ) -> experiment.Experiment: id = helpers.generate_id() + + if isinstance(experiment_config, Mapping): + metadata = jsonable_encoder.jsonable_encoder(experiment_config) + else: + LOGGER.error( + "Experiment config must be dictionary, but %s was provided. Config will not be logged." + ) + metadata = None + self._rest_client.experiments.create_experiment( - name=name, dataset_name=dataset_name, id=id + name=name, + dataset_name=dataset_name, + id=id, + metadata=metadata, ) experiment_ = experiment.Experiment( diff --git a/sdks/python/src/opik/evaluation/evaluator.py b/sdks/python/src/opik/evaluation/evaluator.py index 94c1e6dfff..2c4636a955 100644 --- a/sdks/python/src/opik/evaluation/evaluator.py +++ b/sdks/python/src/opik/evaluation/evaluator.py @@ -1,5 +1,5 @@ import time -from typing import List +from typing import List, Dict, Any, Optional from .types import LLMTask from .metrics import base_metric @@ -15,6 +15,7 @@ def evaluate( task: LLMTask, scoring_metrics: List[base_metric.BaseMetric], experiment_name: str, + experiment_config: Optional[Dict[str, Any]] = None, verbose: int = 1, task_threads: int = 16, ) -> evaluation_result.EvaluationResult: @@ -27,12 +28,19 @@ def evaluate( task: A callable object that takes DatasetItem as input and returns dictionary which will later be used for scoring + experiment_name: The name of the experiment associated with evaluation run + + experiment_config: The dictionary with parameters that describe experiment + scoring_metrics: List of metrics to calculate during evaluation. Each metric has `score(...)` method, arguments for this method are taken from the `task` output, check the signature of the `score` method in metrics that you need to find out which keys are mandatory in `task`-returned dictionary. + verbose: an integer value that controls evaluation output logs such as summary and tqdm progress bar. + 0 - no outputs, 1 - outputs are enabled (default). + task_threads: amount of thread workers to run tasks. If set to 1, no additional threads are created, all tasks executed in the current thread sequentially. are executed sequentially in the current thread. @@ -58,7 +66,9 @@ def evaluate( scores_logger.log_scores(client=client, test_results=test_results) experiment = client.create_experiment( - name=experiment_name, dataset_name=dataset.name + name=experiment_name, + dataset_name=dataset.name, + experiment_config=experiment_config, ) experiment_items = [ experiment_item.ExperimentItem( diff --git a/sdks/python/src/opik/rest_api/__init__.py b/sdks/python/src/opik/rest_api/__init__.py index 5966eec94d..bbb10002dc 100644 --- a/sdks/python/src/opik/rest_api/__init__.py +++ b/sdks/python/src/opik/rest_api/__init__.py @@ -9,6 +9,8 @@ CategoricalFeedbackDetailCreate, CategoricalFeedbackDetailPublic, CategoricalFeedbackDetailUpdate, + ChunkedOutputJsonNode, + ChunkedOutputJsonNodeType, Dataset, DatasetItem, DatasetItemBatch, @@ -75,13 +77,18 @@ ProjectPagePublic, ProjectPublic, Span, + SpanBatch, SpanPagePublic, SpanPublic, SpanPublicType, SpanType, + SpanWrite, + SpanWriteType, Trace, + TraceBatch, TracePagePublic, TracePublic, + TraceWrite, ) from .errors import ( BadRequestError, @@ -93,7 +100,7 @@ from . import datasets, experiments, feedback_definitions, projects, spans, traces from .environment import OpikApiEnvironment from .feedback_definitions import FindFeedbackDefinitionsRequestType -from .spans import GetSpansByProjectRequestType, SpanWriteType +from .spans import GetSpansByProjectRequestType __all__ = [ "BadRequestError", @@ -105,6 +112,8 @@ "CategoricalFeedbackDetailCreate", "CategoricalFeedbackDetailPublic", "CategoricalFeedbackDetailUpdate", + "ChunkedOutputJsonNode", + "ChunkedOutputJsonNodeType", "ConflictError", "Dataset", "DatasetItem", @@ -177,14 +186,18 @@ "ProjectPagePublic", "ProjectPublic", "Span", + "SpanBatch", "SpanPagePublic", "SpanPublic", "SpanPublicType", "SpanType", + "SpanWrite", "SpanWriteType", "Trace", + "TraceBatch", "TracePagePublic", "TracePublic", + "TraceWrite", "UnprocessableEntityError", "datasets", "experiments", diff --git a/sdks/python/src/opik/rest_api/client.py b/sdks/python/src/opik/rest_api/client.py index b7ded7a09b..c371dbcd86 100644 --- a/sdks/python/src/opik/rest_api/client.py +++ b/sdks/python/src/opik/rest_api/client.py @@ -1,10 +1,14 @@ # This file was auto-generated by Fern from our API Definition. import typing +from json.decoder import JSONDecodeError import httpx +from .core.api_error import ApiError from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from .core.pydantic_utilities import pydantic_v1 +from .core.request_options import RequestOptions from .datasets.client import AsyncDatasetsClient, DatasetsClient from .environment import OpikApiEnvironment from .experiments.client import AsyncExperimentsClient, ExperimentsClient @@ -83,6 +87,38 @@ def __init__( self.spans = SpansClient(client_wrapper=self._client_wrapper) self.traces = TracesClient(client_wrapper=self._client_wrapper) + def is_alive( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> typing.Any: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + typing.Any + default response + + Examples + -------- + from Opik.client import OpikApi + + client = OpikApi() + client.is_alive() + """ + _response = self._client_wrapper.httpx_client.request( + "is-alive/ping", method="GET", request_options=request_options + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(typing.Any, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + class AsyncOpikApi: """ @@ -150,6 +186,46 @@ def __init__( self.spans = AsyncSpansClient(client_wrapper=self._client_wrapper) self.traces = AsyncTracesClient(client_wrapper=self._client_wrapper) + async def is_alive( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> typing.Any: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + typing.Any + default response + + Examples + -------- + import asyncio + + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.is_alive() + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "is-alive/ping", method="GET", request_options=request_options + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(typing.Any, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + def _get_base_url( *, base_url: typing.Optional[str] = None, environment: OpikApiEnvironment diff --git a/sdks/python/src/opik/rest_api/experiments/client.py b/sdks/python/src/opik/rest_api/experiments/client.py index ac76ef8967..2414e45592 100644 --- a/sdks/python/src/opik/rest_api/experiments/client.py +++ b/sdks/python/src/opik/rest_api/experiments/client.py @@ -13,6 +13,7 @@ from ..types.experiment_item_public import ExperimentItemPublic from ..types.experiment_page_public import ExperimentPagePublic from ..types.experiment_public import ExperimentPublic +from ..types.json_node_write import JsonNodeWrite # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -79,6 +80,7 @@ def create_experiment( dataset_name: str, name: str, id: typing.Optional[str] = OMIT, + metadata: typing.Optional[JsonNodeWrite] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> None: """ @@ -92,6 +94,8 @@ def create_experiment( id : typing.Optional[str] + metadata : typing.Optional[JsonNodeWrite] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -112,7 +116,12 @@ def create_experiment( _response = self._client_wrapper.httpx_client.request( "v1/private/experiments", method="POST", - json={"id": id, "dataset_name": dataset_name, "name": name}, + json={ + "id": id, + "dataset_name": dataset_name, + "name": name, + "metadata": metadata, + }, request_options=request_options, omit=OMIT, ) @@ -377,6 +386,7 @@ async def create_experiment( dataset_name: str, name: str, id: typing.Optional[str] = OMIT, + metadata: typing.Optional[JsonNodeWrite] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> None: """ @@ -390,6 +400,8 @@ async def create_experiment( id : typing.Optional[str] + metadata : typing.Optional[JsonNodeWrite] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -418,7 +430,12 @@ async def main() -> None: _response = await self._client_wrapper.httpx_client.request( "v1/private/experiments", method="POST", - json={"id": id, "dataset_name": dataset_name, "name": name}, + json={ + "id": id, + "dataset_name": dataset_name, + "name": name, + "metadata": metadata, + }, request_options=request_options, omit=OMIT, ) diff --git a/sdks/python/src/opik/rest_api/spans/__init__.py b/sdks/python/src/opik/rest_api/spans/__init__.py index 15a5d83b1f..4abe783760 100644 --- a/sdks/python/src/opik/rest_api/spans/__init__.py +++ b/sdks/python/src/opik/rest_api/spans/__init__.py @@ -1,5 +1,5 @@ # This file was auto-generated by Fern from our API Definition. -from .types import GetSpansByProjectRequestType, SpanWriteType +from .types import GetSpansByProjectRequestType -__all__ = ["GetSpansByProjectRequestType", "SpanWriteType"] +__all__ = ["GetSpansByProjectRequestType"] diff --git a/sdks/python/src/opik/rest_api/spans/client.py b/sdks/python/src/opik/rest_api/spans/client.py index c96200fe23..f0ff339bfa 100644 --- a/sdks/python/src/opik/rest_api/spans/client.py +++ b/sdks/python/src/opik/rest_api/spans/client.py @@ -17,8 +17,9 @@ from ..types.json_node_write import JsonNodeWrite from ..types.span_page_public import SpanPagePublic from ..types.span_public import SpanPublic +from ..types.span_write import SpanWrite +from ..types.span_write_type import SpanWriteType from .types.get_spans_by_project_request_type import GetSpansByProjectRequestType -from .types.span_write_type import SpanWriteType # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -282,6 +283,62 @@ def create_span( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) + def create_spans( + self, + *, + spans: typing.Sequence[SpanWrite], + request_options: typing.Optional[RequestOptions] = None, + ) -> None: + """ + Create spans + + Parameters + ---------- + spans : typing.Sequence[SpanWrite] + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import datetime + + from Opik import SpanWrite + from Opik.client import OpikApi + + client = OpikApi() + client.spans.create_spans( + spans=[ + SpanWrite( + trace_id="trace_id", + name="name", + type="general", + start_time=datetime.datetime.fromisoformat( + "2024-01-15 09:30:00+00:00", + ), + ) + ], + ) + """ + _response = self._client_wrapper.httpx_client.request( + "v1/private/spans/batch", + method="POST", + json={"spans": spans}, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + def get_span_by_id( self, id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> SpanPublic: @@ -375,6 +432,7 @@ def update_span( *, trace_id: str, project_name: typing.Optional[str] = OMIT, + project_id: typing.Optional[str] = OMIT, parent_span_id: typing.Optional[str] = OMIT, end_time: typing.Optional[dt.datetime] = OMIT, input: typing.Optional[JsonNode] = OMIT, @@ -394,7 +452,10 @@ def update_span( trace_id : str project_name : typing.Optional[str] - If null, the default project is used + If null and project_id not specified, Default Project is assumed + + project_id : typing.Optional[str] + If null and project_name not specified, Default Project is assumed parent_span_id : typing.Optional[str] @@ -432,6 +493,7 @@ def update_span( method="PATCH", json={ "project_name": project_name, + "project_id": project_id, "trace_id": trace_id, "parent_span_id": parent_span_id, "end_time": end_time, @@ -838,6 +900,69 @@ async def main() -> None: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) + async def create_spans( + self, + *, + spans: typing.Sequence[SpanWrite], + request_options: typing.Optional[RequestOptions] = None, + ) -> None: + """ + Create spans + + Parameters + ---------- + spans : typing.Sequence[SpanWrite] + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import asyncio + import datetime + + from Opik import SpanWrite + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.spans.create_spans( + spans=[ + SpanWrite( + trace_id="trace_id", + name="name", + type="general", + start_time=datetime.datetime.fromisoformat( + "2024-01-15 09:30:00+00:00", + ), + ) + ], + ) + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "v1/private/spans/batch", + method="POST", + json={"spans": spans}, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + async def get_span_by_id( self, id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> SpanPublic: @@ -947,6 +1072,7 @@ async def update_span( *, trace_id: str, project_name: typing.Optional[str] = OMIT, + project_id: typing.Optional[str] = OMIT, parent_span_id: typing.Optional[str] = OMIT, end_time: typing.Optional[dt.datetime] = OMIT, input: typing.Optional[JsonNode] = OMIT, @@ -966,7 +1092,10 @@ async def update_span( trace_id : str project_name : typing.Optional[str] - If null, the default project is used + If null and project_id not specified, Default Project is assumed + + project_id : typing.Optional[str] + If null and project_name not specified, Default Project is assumed parent_span_id : typing.Optional[str] @@ -1012,6 +1141,7 @@ async def main() -> None: method="PATCH", json={ "project_name": project_name, + "project_id": project_id, "trace_id": trace_id, "parent_span_id": parent_span_id, "end_time": end_time, diff --git a/sdks/python/src/opik/rest_api/spans/types/__init__.py b/sdks/python/src/opik/rest_api/spans/types/__init__.py index bb2425f8bc..33fe1d6bb6 100644 --- a/sdks/python/src/opik/rest_api/spans/types/__init__.py +++ b/sdks/python/src/opik/rest_api/spans/types/__init__.py @@ -1,6 +1,5 @@ # This file was auto-generated by Fern from our API Definition. from .get_spans_by_project_request_type import GetSpansByProjectRequestType -from .span_write_type import SpanWriteType -__all__ = ["GetSpansByProjectRequestType", "SpanWriteType"] +__all__ = ["GetSpansByProjectRequestType"] diff --git a/sdks/python/src/opik/rest_api/traces/client.py b/sdks/python/src/opik/rest_api/traces/client.py index 82e057a429..70bc18144c 100644 --- a/sdks/python/src/opik/rest_api/traces/client.py +++ b/sdks/python/src/opik/rest_api/traces/client.py @@ -15,6 +15,7 @@ from ..types.json_node_write import JsonNodeWrite from ..types.trace_page_public import TracePagePublic from ..types.trace_public import TracePublic +from ..types.trace_write import TraceWrite # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -252,6 +253,60 @@ def create_trace( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) + def create_traces( + self, + *, + traces: typing.Sequence[TraceWrite], + request_options: typing.Optional[RequestOptions] = None, + ) -> None: + """ + Create traces + + Parameters + ---------- + traces : typing.Sequence[TraceWrite] + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import datetime + + from Opik import TraceWrite + from Opik.client import OpikApi + + client = OpikApi() + client.traces.create_traces( + traces=[ + TraceWrite( + name="name", + start_time=datetime.datetime.fromisoformat( + "2024-01-15 09:30:00+00:00", + ), + ) + ], + ) + """ + _response = self._client_wrapper.httpx_client.request( + "v1/private/traces/batch", + method="POST", + json={"traces": traces}, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + def get_trace_by_id( self, id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> TracePublic: @@ -336,6 +391,7 @@ def update_trace( id: str, *, project_name: typing.Optional[str] = OMIT, + project_id: typing.Optional[str] = OMIT, end_time: typing.Optional[dt.datetime] = OMIT, input: typing.Optional[JsonNode] = OMIT, output: typing.Optional[JsonNode] = OMIT, @@ -351,7 +407,10 @@ def update_trace( id : str project_name : typing.Optional[str] - If null, the default project is used + If null and project_id not specified, Default Project is assumed + + project_id : typing.Optional[str] + If null and project_name not specified, Default Project is assumed end_time : typing.Optional[dt.datetime] @@ -384,6 +443,7 @@ def update_trace( method="PATCH", json={ "project_name": project_name, + "project_id": project_id, "end_time": end_time, "input": input, "output": output, @@ -757,6 +817,67 @@ async def main() -> None: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) + async def create_traces( + self, + *, + traces: typing.Sequence[TraceWrite], + request_options: typing.Optional[RequestOptions] = None, + ) -> None: + """ + Create traces + + Parameters + ---------- + traces : typing.Sequence[TraceWrite] + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import asyncio + import datetime + + from Opik import TraceWrite + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.traces.create_traces( + traces=[ + TraceWrite( + name="name", + start_time=datetime.datetime.fromisoformat( + "2024-01-15 09:30:00+00:00", + ), + ) + ], + ) + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "v1/private/traces/batch", + method="POST", + json={"traces": traces}, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + async def get_trace_by_id( self, id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> TracePublic: @@ -857,6 +978,7 @@ async def update_trace( id: str, *, project_name: typing.Optional[str] = OMIT, + project_id: typing.Optional[str] = OMIT, end_time: typing.Optional[dt.datetime] = OMIT, input: typing.Optional[JsonNode] = OMIT, output: typing.Optional[JsonNode] = OMIT, @@ -872,7 +994,10 @@ async def update_trace( id : str project_name : typing.Optional[str] - If null, the default project is used + If null and project_id not specified, Default Project is assumed + + project_id : typing.Optional[str] + If null and project_name not specified, Default Project is assumed end_time : typing.Optional[dt.datetime] @@ -913,6 +1038,7 @@ async def main() -> None: method="PATCH", json={ "project_name": project_name, + "project_id": project_id, "end_time": end_time, "input": input, "output": output, diff --git a/sdks/python/src/opik/rest_api/types/__init__.py b/sdks/python/src/opik/rest_api/types/__init__.py index 6bf166b0f9..3ca6fd5529 100644 --- a/sdks/python/src/opik/rest_api/types/__init__.py +++ b/sdks/python/src/opik/rest_api/types/__init__.py @@ -8,6 +8,8 @@ from .categorical_feedback_detail_create import CategoricalFeedbackDetailCreate from .categorical_feedback_detail_public import CategoricalFeedbackDetailPublic from .categorical_feedback_detail_update import CategoricalFeedbackDetailUpdate +from .chunked_output_json_node import ChunkedOutputJsonNode +from .chunked_output_json_node_type import ChunkedOutputJsonNodeType from .dataset import Dataset from .dataset_item import DatasetItem from .dataset_item_batch import DatasetItemBatch @@ -80,13 +82,18 @@ from .project_page_public import ProjectPagePublic from .project_public import ProjectPublic from .span import Span +from .span_batch import SpanBatch from .span_page_public import SpanPagePublic from .span_public import SpanPublic from .span_public_type import SpanPublicType from .span_type import SpanType +from .span_write import SpanWrite +from .span_write_type import SpanWriteType from .trace import Trace +from .trace_batch import TraceBatch from .trace_page_public import TracePagePublic from .trace_public import TracePublic +from .trace_write import TraceWrite __all__ = [ "CategoricalFeedbackDefinition", @@ -97,6 +104,8 @@ "CategoricalFeedbackDetailCreate", "CategoricalFeedbackDetailPublic", "CategoricalFeedbackDetailUpdate", + "ChunkedOutputJsonNode", + "ChunkedOutputJsonNodeType", "Dataset", "DatasetItem", "DatasetItemBatch", @@ -163,11 +172,16 @@ "ProjectPagePublic", "ProjectPublic", "Span", + "SpanBatch", "SpanPagePublic", "SpanPublic", "SpanPublicType", "SpanType", + "SpanWrite", + "SpanWriteType", "Trace", + "TraceBatch", "TracePagePublic", "TracePublic", + "TraceWrite", ] diff --git a/sdks/python/src/opik/rest_api/types/chunked_output_json_node.py b/sdks/python/src/opik/rest_api/types/chunked_output_json_node.py new file mode 100644 index 0000000000..0857b39b5b --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/chunked_output_json_node.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .chunked_output_json_node_type import ChunkedOutputJsonNodeType + + +class ChunkedOutputJsonNode(pydantic_v1.BaseModel): + type: typing.Optional[ChunkedOutputJsonNodeType] = None + closed: typing.Optional[bool] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/chunked_output_json_node_type.py b/sdks/python/src/opik/rest_api/types/chunked_output_json_node_type.py new file mode 100644 index 0000000000..f4117572b0 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/chunked_output_json_node_type.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class ChunkedOutputJsonNodeType(pydantic_v1.BaseModel): + type_name: typing.Optional[str] = pydantic_v1.Field(alias="typeName", default=None) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/experiment.py b/sdks/python/src/opik/rest_api/types/experiment.py index e6a8240dd9..8d1af9d446 100644 --- a/sdks/python/src/opik/rest_api/types/experiment.py +++ b/sdks/python/src/opik/rest_api/types/experiment.py @@ -6,6 +6,7 @@ from ..core.datetime_utils import serialize_datetime from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 from .feedback_score_average import FeedbackScoreAverage +from .json_node import JsonNode class Experiment(pydantic_v1.BaseModel): @@ -13,6 +14,7 @@ class Experiment(pydantic_v1.BaseModel): dataset_name: str dataset_id: typing.Optional[str] = None name: str + metadata: typing.Optional[JsonNode] = None feedback_scores: typing.Optional[typing.List[FeedbackScoreAverage]] = None trace_count: typing.Optional[int] = None created_at: typing.Optional[dt.datetime] = None diff --git a/sdks/python/src/opik/rest_api/types/experiment_public.py b/sdks/python/src/opik/rest_api/types/experiment_public.py index 852a86ba35..ccee386288 100644 --- a/sdks/python/src/opik/rest_api/types/experiment_public.py +++ b/sdks/python/src/opik/rest_api/types/experiment_public.py @@ -6,12 +6,15 @@ from ..core.datetime_utils import serialize_datetime from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 from .feedback_score_average_public import FeedbackScoreAveragePublic +from .json_node_public import JsonNodePublic class ExperimentPublic(pydantic_v1.BaseModel): id: typing.Optional[str] = None + dataset_name: str dataset_id: typing.Optional[str] = None name: str + metadata: typing.Optional[JsonNodePublic] = None feedback_scores: typing.Optional[typing.List[FeedbackScoreAveragePublic]] = None trace_count: typing.Optional[int] = None created_at: typing.Optional[dt.datetime] = None diff --git a/sdks/python/src/opik/rest_api/types/span_batch.py b/sdks/python/src/opik/rest_api/types/span_batch.py new file mode 100644 index 0000000000..3e4d287c28 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/span_batch.py @@ -0,0 +1,43 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .span import Span + + +class SpanBatch(pydantic_v1.BaseModel): + spans: typing.List[Span] + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/span_write.py b/sdks/python/src/opik/rest_api/types/span_write.py new file mode 100644 index 0000000000..52746d463f --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/span_write.py @@ -0,0 +1,60 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .json_node_write import JsonNodeWrite +from .span_write_type import SpanWriteType + + +class SpanWrite(pydantic_v1.BaseModel): + id: typing.Optional[str] = None + project_name: typing.Optional[str] = pydantic_v1.Field(default=None) + """ + If null, the default project is used + """ + + trace_id: str + parent_span_id: typing.Optional[str] = None + name: str + type: SpanWriteType + start_time: dt.datetime + end_time: typing.Optional[dt.datetime] = None + input: typing.Optional[JsonNodeWrite] = None + output: typing.Optional[JsonNodeWrite] = None + metadata: typing.Optional[JsonNodeWrite] = None + tags: typing.Optional[typing.List[str]] = None + usage: typing.Optional[typing.Dict[str, int]] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/spans/types/span_write_type.py b/sdks/python/src/opik/rest_api/types/span_write_type.py similarity index 100% rename from sdks/python/src/opik/rest_api/spans/types/span_write_type.py rename to sdks/python/src/opik/rest_api/types/span_write_type.py diff --git a/sdks/python/src/opik/rest_api/types/trace_batch.py b/sdks/python/src/opik/rest_api/types/trace_batch.py new file mode 100644 index 0000000000..51a78ff261 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/trace_batch.py @@ -0,0 +1,43 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .trace import Trace + + +class TraceBatch(pydantic_v1.BaseModel): + traces: typing.List[Trace] + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/trace_write.py b/sdks/python/src/opik/rest_api/types/trace_write.py new file mode 100644 index 0000000000..3d06797884 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/trace_write.py @@ -0,0 +1,55 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .json_node_write import JsonNodeWrite + + +class TraceWrite(pydantic_v1.BaseModel): + id: typing.Optional[str] = None + project_name: typing.Optional[str] = pydantic_v1.Field(default=None) + """ + If null, the default project is used + """ + + name: str + start_time: dt.datetime + end_time: typing.Optional[dt.datetime] = None + input: typing.Optional[JsonNodeWrite] = None + output: typing.Optional[JsonNodeWrite] = None + metadata: typing.Optional[JsonNodeWrite] = None + tags: typing.Optional[typing.List[str]] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/tests/e2e/conftest.py b/sdks/python/tests/e2e/conftest.py index 37cc21d998..312b9a740c 100644 --- a/sdks/python/tests/e2e/conftest.py +++ b/sdks/python/tests/e2e/conftest.py @@ -32,12 +32,8 @@ def dataset_name(opik_client: opik.Opik): name = f"e2e-tests-dataset-{ _random_chars()}" yield name - opik_client.delete_dataset(name) - @pytest.fixture def experiment_name(opik_client: opik.Opik): name = f"e2e-tests-experiment-{ _random_chars()}" yield name - - # TODO: delete the experiment diff --git a/sdks/python/tests/e2e/test_experiment.py b/sdks/python/tests/e2e/test_experiment.py index cc4cdcbb1e..49a14523f9 100644 --- a/sdks/python/tests/e2e/test_experiment.py +++ b/sdks/python/tests/e2e/test_experiment.py @@ -47,6 +47,7 @@ def task(item: dataset_item.DatasetItem): task=task, scoring_metrics=[equals_metric], experiment_name=experiment_name, + experiment_config={"model_name": "gpt-3.5"}, ) opik.flush_tracker() @@ -55,6 +56,7 @@ def task(item: dataset_item.DatasetItem): opik_client=opik_client, id=evaluation_result.experiment_id, experiment_name=evaluation_result.experiment_name, + experiment_metadata={"model_name": "gpt-3.5"}, traces_amount=3, # one trace per dataset item feedback_scores_amount=1, # an average value of all Equals metric scores ) diff --git a/sdks/python/tests/e2e/verifiers.py b/sdks/python/tests/e2e/verifiers.py index ea9dae88f4..a5c8846f8b 100644 --- a/sdks/python/tests/e2e/verifiers.py +++ b/sdks/python/tests/e2e/verifiers.py @@ -178,6 +178,7 @@ def verify_experiment( opik_client: opik.Opik, id: str, experiment_name: str, + experiment_metadata: Optional[Dict[str, Any]], feedback_scores_amount: int, traces_amount: int, ): @@ -195,6 +196,10 @@ def verify_experiment( experiment_content = rest_client.experiments.get_experiment_by_id(id) + assert ( + experiment_content.metadata == experiment_metadata + ), f"{experiment_content.metadata} != {experiment_metadata}" + assert ( experiment_content.name == experiment_name ), f"{experiment_content.name} != {experiment_name}" diff --git a/sdks/python/tests/unit/evaluation/test_evaluate.py b/sdks/python/tests/unit/evaluation/test_evaluate.py index 3a4f52556f..7a18c20372 100644 --- a/sdks/python/tests/unit/evaluation/test_evaluate.py +++ b/sdks/python/tests/unit/evaluation/test_evaluate.py @@ -71,7 +71,9 @@ def say_task(dataset_item: dataset_item.DatasetItem): ) mock_create_experiment.assert_called_once_with( - dataset_name="the-dataset-name", name="the-experiment-name" + dataset_name="the-dataset-name", + name="the-experiment-name", + experiment_config=None, ) mock_experiment.insert.assert_called_once_with( experiment_items=[mock.ANY, mock.ANY]