Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix data modeling data type Enum dumping #2091

Merged
merged 4 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Changes are grouped as follows
### Added
- Support for the `/simulators` and `/simulators/integration` API endpoints.

## [7.72.1] - 2025-01-14
### Fixed
- Data Modeling container type Enum now dumps correctly and no longer camelCases the user-defined values.

## [7.72.0] - 2025-01-14
### Added
- Document Summary and Document Question Answering endpoints from the AI API.
Expand Down
24 changes: 9 additions & 15 deletions cognite/client/_api/ai/tools/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from cognite.client._api_client import APIClient
from cognite.client.data_classes.ai import Answer, AnswerLanguage, Summary
from cognite.client.data_classes.data_modeling import NodeId
from cognite.client.exceptions import CogniteAPIError
from cognite.client.utils._identifier import IdentifierSequenceWithInstanceId


Expand All @@ -18,7 +17,7 @@ def summarize(
id: int | None = None,
external_id: str | None = None,
instance_id: NodeId | None = None,
) -> Summary | None:
) -> Summary:
"""`Summarize a document using a Large Language Model. <https://developer.cognite.com/api#tag/Document-AI/operation/document_questioning_api_v1_projects__projectName__ai_tools_documents_ask_post>`_

Note:
Expand All @@ -31,11 +30,11 @@ def summarize(
instance_id (NodeId | None): The instance ID of the document

Returns:
Summary | None: A summary of the document if the document exists else None.
Summary: A summary of the document.

Examples:

Summarize a single document with id:
Summarize a single document using ID:

>>> from cognite.client import CogniteClient
>>> client = CogniteClient()
Expand All @@ -49,13 +48,8 @@ def summarize(
... )
"""
ident = IdentifierSequenceWithInstanceId.load(id, external_id, instance_id).as_singleton()
try:
res = self._post(self._RESOURCE_PATH + "/summarize", json={"items": ident.as_dicts()})
return Summary._load(res.json()["items"][0])
except CogniteAPIError as e:
if e.code != 404:
raise
return None
res = self._post(self._RESOURCE_PATH + "/summarize", json={"items": ident.as_dicts()})
return Summary._load(res.json()["items"][0])
Comment on lines +51 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes #42190b2


def ask_question(
self,
Expand Down Expand Up @@ -94,7 +88,7 @@ def ask_question(
instance_id (NodeId | Sequence[NodeId] | None): The instance ID(s) of the document(s)
language (AnswerLanguage | Literal['Chinese', 'Dutch', 'English', 'French', 'German', 'Italian', 'Japanese', 'Korean', 'Latvian', 'Norwegian', 'Portuguese', 'Spanish', 'Swedish']): The desired language of the answer, defaults to English.
additional_context (str | None): Additional context that you want the LLM to take into account.
ignore_unknown_ids (bool): Whether to skip documents that do not exists or that are not fully processed, instead of throwing an error
ignore_unknown_ids (bool): Whether to skip documents that do not exist or that are not fully processed, instead of throwing an error. If no valid documents are found, an error will always be raised.

Returns:
Answer: The answer to the question in the form of a list of multiple content objects, each consisting of a chunk of text along with a set of references.
Expand All @@ -105,9 +99,9 @@ def ask_question(

>>> from cognite.client import CogniteClient
>>> client = CogniteClient()
>>> client.ai.tools.documents.ask(
>>> client.ai.tools.documents.ask_question(
... question="What model pump was used?",
... ids=123,
... id=123,
... )

Ask a question about multiple documents referenced using external IDs, and instance ID
Expand All @@ -117,7 +111,7 @@ def ask_question(
>>> from cognite.client.data_classes.ai import AnswerLanguage
>>> client.ai.tools.documents.ask_question(
... question="What other pumps are available?",
... external_ids=["foo", "bar"],
... external_id=["foo", "bar"],
... instance_id=NodeId("my-space", "my-xid"),
... language=AnswerLanguage.German,
... )
Expand Down
1 change: 1 addition & 0 deletions cognite/client/_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,7 @@ def _raise_api_error(self, res: Response, payload: dict) -> NoReturn:
f"REDIRECT AFTER HTTP Error {res_hist.status_code} {res_hist.request.method} {res_hist.request.url}: {res_hist.content.decode()}"
)
logger.debug(f"HTTP Error {code} {res.request.method} {res.request.url}: {msg}", extra=error_details)
# TODO: We should throw "CogniteNotFoundError" if missing is populated and CogniteDuplicatedError when duplicated...
raise CogniteAPIError(
msg,
code,
Expand Down
2 changes: 1 addition & 1 deletion cognite/client/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations

__version__ = "7.72.0"
__version__ = "7.72.1"

__api_subversion__ = "20230101"
6 changes: 3 additions & 3 deletions cognite/client/data_classes/data_modeling/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
PropertyType,
)
from cognite.client.data_classes.data_modeling.ids import ContainerId
from cognite.client.utils._text import convert_all_keys_to_camel_case_recursive
from cognite.client.utils._text import convert_all_keys_to_camel_case_recursive, to_camel_case

if TYPE_CHECKING:
from cognite.client import CogniteClient
Expand Down Expand Up @@ -276,8 +276,8 @@ def dump(self, camel_case: bool = True) -> dict[str, str | dict]:
output["immutable"] = self.immutable
for key in ["nullable", "auto_increment", "name", "default_value", "description"]:
if (value := getattr(self, key)) is not None:
output[key] = value
return convert_all_keys_to_camel_case_recursive(output) if camel_case else output
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a dangerous function. Should we have a task to remove it? Or do it have any good use cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, I had the same thought!

output[to_camel_case(key) if camel_case else key] = value
return output


@dataclass(frozen=True)
Expand Down
9 changes: 9 additions & 0 deletions cognite/client/data_classes/data_modeling/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,12 @@ class Enum(PropertyType):
_type = "enum"
values: dict[str, EnumValue]
unknown_value: str | None = None

def dump(self, camel_case: bool = True) -> dict[str, Any]:
output = {
"type": self._type,
"values": {k: v.dump(camel_case) for k, v in self.values.items()},
}
if self.unknown_value is not None:
output["unknownValue" if camel_case else "unknown_value"] = self.unknown_value
return output
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "cognite-sdk"

version = "7.72.0"
version = "7.72.1"

description = "Cognite Python SDK"
readme = "README.md"
Expand Down
9 changes: 4 additions & 5 deletions tests/tests_integration/test_api/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import time
import unittest
from datetime import datetime

import pytest

Expand Down Expand Up @@ -287,10 +286,10 @@ def workflow_scheduled_trigger(cognite_client: CogniteClient, workflow_setup_pre
metadata={"test": "integration_schedule"},
)
)
# have to sleep until workflow is triggered because it's the only way to properly test get_trigger_run_history
now = datetime.now()
seconds_til_next_minute = 60 - now.second + 30 # 30 seconds buffer, needed from experience...
time.sleep(seconds_til_next_minute)
# Have to sleep until workflow is triggered because it's the only way to properly test get_trigger_run_history
# ...and as of Jan 14, 2025, there's "an artificial delay of 90 sec to scheduled triggers to prevent a stampede
# effect on downstream services for overlapping triggers (up to two minutes, stable per trigger)".
time.sleep(90 + 60)

yield trigger
cognite_client.workflows.triggers.delete(trigger.external_id)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from cognite.client.data_classes._base import UnknownCogniteObject
from cognite.client.data_classes.data_modeling import data_types
from cognite.client.data_classes.data_modeling.containers import (
Constraint,
Container,
Expand Down Expand Up @@ -43,6 +44,18 @@ def test_load_dump__only_required(self, data: dict) -> None:
actual = ContainerProperty.load(data).dump(camel_case=True)
assert data == actual

def test_dump_no_longer_camelCases_everything_when_used(self) -> None:
cp = ContainerProperty(
data_types.Enum(
{
"Closed_I_think": data_types.EnumValue("Valve_is_closed"),
"Opened or not": data_types.EnumValue("Valve is opened"),
}
)
)
assert ContainerProperty._load(cp.dump()) == cp
assert sorted(cp.dump(camel_case=True)["type"]["values"]) == ["Closed_I_think", "Opened or not"] # type: ignore [index]


class TestConstraint:
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from cognite.client.data_classes._base import UnknownCogniteObject
from cognite.client.data_classes.data_modeling.data_types import (
DirectRelationReference,
Enum,
PropertyType,
PropertyTypeWithUnit,
Text,
Expand Down Expand Up @@ -66,7 +67,7 @@ def test_load_ignore_unknown_properties(self) -> None:
data.pop("unknownProperty")
assert data == actual

def test_load_dump_unkown_property(self) -> None:
def test_load_dump_unknown_property(self) -> None:
data = {"type": "unknowngibberish", "list": True, "unit": {"externalId": "known"}}
obj = PropertyType.load(data)
assert isinstance(obj, UnknownCogniteObject)
Expand All @@ -82,6 +83,29 @@ def test_load_text_without_collation(self) -> None:
data["collation"] = "ucs_basic"
assert data == actual

def test_dump_enum_no_camel_casing_of_user_values(self) -> None:
obj = PropertyType.load(
{
"type": "enum",
"values": {
"string_valWe ird_Case": {
"name": "string_valWe ird_Case",
"description": "Time series with string data points.",
},
"numeric_valWe ird_Case": {
"name": "numeric_valWe ird_Case",
"description": "Time series with double floating point data points.",
},
},
}
)
assert isinstance(obj, Enum)

expected_values = ["numeric_valWe ird_Case", "string_valWe ird_Case"]
assert sorted(obj.values) == expected_values
assert sorted(obj.dump(camel_case=False)["values"]) == expected_values
assert sorted(obj.dump(camel_case=True)["values"]) == expected_values


class TestUnitSupport:
@pytest.mark.parametrize(
Expand Down
6 changes: 6 additions & 0 deletions tests/tests_unit/test_docstring_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from cognite.client import _cognite_client, config, credentials
from cognite.client._api import (
ai,
assets,
data_sets,
datapoints,
Expand Down Expand Up @@ -135,3 +136,8 @@ def test_hosted_extractors(self):
def test_postgres_gateway(self):
run_docstring_tests(postgres_gateway_users)
run_docstring_tests(postgres_gateway_tables)

def test_ai(self):
run_docstring_tests(ai)
run_docstring_tests(ai.tools)
run_docstring_tests(ai.tools.documents)
Comment on lines +140 to +143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have they forgotten to do it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They did, and I missed it in review

Loading