diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index 4dc1ae5ef..7692870d6 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -22,6 +22,7 @@ jobs: with: fetch-depth: 1 ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} - name: "Setup java" uses: actions/setup-java@v4 diff --git a/.github/workflows/perform-release.yml b/.github/workflows/perform-release.yml index 02595cf77..5de33da0c 100644 --- a/.github/workflows/perform-release.yml +++ b/.github/workflows/perform-release.yml @@ -88,9 +88,6 @@ jobs: with: distribution: "sapmachine" java-version: ${{ env.JAVA_VERSION }} - server-id: ossrh - server-username: MAVEN_CENTRAL_USER # env variable for username in deploy - server-password: MAVEN_CENTRAL_PASSWORD # env variable for token in deploy - name: "Download Release Asset" id: download-asset @@ -113,8 +110,8 @@ jobs: - name: "Deploy" run: | - MVN_ARGS="${{ env.MVN_CLI_ARGS }} deploy -Drelease -s settings.xml" - mvn $MVN_ARGS + MVN_ARGS="${{ env.MVN_CLI_ARGS }} -Drelease -s settings.xml" + mvn deploy $MVN_ARGS env: MAVEN_GPG_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} diff --git a/README.md b/README.md index 40936a915..5539cdef0 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ After restarting your application, you should see an "aicore" entry in the `VCAP - **Name**: `my-aicore` - **Type**: `HTTP` - - **URL**: `[serviceurls.AI_API_URL]/v2` (append `/v2` to the URL) + - **URL**: `[serviceurls.AI_API_URL]` - **Proxy Type**: `Internet` - **Authentication**: `OAuth2ClientCredentials` - **Client ID**: `[clientid]` diff --git a/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md b/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md index d336a47f5..31adb89a5 100644 --- a/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md +++ b/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md @@ -91,8 +91,7 @@ var prompt = new OrchestrationPrompt("Hello world! Why is this phrase so famous? var result = client.chatCompletion(prompt, config); -String messageResult = - result.getOrchestrationResult().getChoices().get(0).getMessage().getContent(); +String messageResult = result.getContent(); ``` In this example, the Orchestration service generates a response to the user message "Hello world! Why is this phrase so famous?". diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/JacksonMixins.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/JacksonMixins.java new file mode 100644 index 000000000..8e26bf3af --- /dev/null +++ b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/JacksonMixins.java @@ -0,0 +1,25 @@ +package com.sap.ai.sdk.orchestration; + +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.sap.ai.sdk.orchestration.client.model.LLMChoice; +import com.sap.ai.sdk.orchestration.client.model.LLMModuleResultSynchronous; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +final class JacksonMixins { + /** Mixin to enforce a specific subtype to be deserialized always. */ + @JsonTypeInfo(use = JsonTypeInfo.Id.NONE) + @JsonDeserialize(as = LLMModuleResultSynchronous.class) + interface LLMModuleResultMixIn {} + + /** Mixin to enforce a specific subtype to be deserialized always. */ + @JsonTypeInfo(use = JsonTypeInfo.Id.NONE) + @JsonDeserialize(as = LLMChoice.class) + interface ModuleResultsOutputUnmaskingInnerMixIn {} + + /** Mixin to suppress @JsonTypeInfo for oneOf interfaces. */ + @JsonTypeInfo(use = JsonTypeInfo.Id.NONE) + interface NoTypeInfoMixin {} +} diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/LLMModuleResultMixIn.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/LLMModuleResultMixIn.java deleted file mode 100644 index 680e50290..000000000 --- a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/LLMModuleResultMixIn.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.sap.ai.sdk.orchestration; - -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.sap.ai.sdk.orchestration.client.model.LLMModuleResultSynchronous; - -/** Mixin to enforce a specific subtype to be deserialized always. */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NONE) -@JsonDeserialize(as = LLMModuleResultSynchronous.class) -interface LLMModuleResultMixIn {} diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/NoTypeInfoMixin.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/NoTypeInfoMixin.java deleted file mode 100644 index b594c0090..000000000 --- a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/NoTypeInfoMixin.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.sap.ai.sdk.orchestration; - -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -/** Mixin to suppress @JsonTypeInfo for oneOf interfaces. */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NONE) -interface NoTypeInfoMixin {} diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationChatResponse.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationChatResponse.java new file mode 100644 index 000000000..7eb830e4a --- /dev/null +++ b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationChatResponse.java @@ -0,0 +1,41 @@ +package com.sap.ai.sdk.orchestration; + +import static lombok.AccessLevel.PACKAGE; + +import com.sap.ai.sdk.orchestration.client.model.CompletionPostResponse; +import com.sap.ai.sdk.orchestration.client.model.LLMModuleResultSynchronous; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; +import lombok.Value; + +/** Orchestration chat completion output. */ +@Value +@RequiredArgsConstructor(access = PACKAGE) +public class OrchestrationChatResponse { + CompletionPostResponse originalResponse; + + /** + * Get the message content from the output. + * + *
Note: If there are multiple choices only the first one is returned
+ *
+ * @return the message content or empty string.
+ * @throws OrchestrationClientException if the content filter filtered the output.
+ */
+ @Nonnull
+ public String getContent() throws OrchestrationClientException {
+ final var choices =
+ ((LLMModuleResultSynchronous) originalResponse.getOrchestrationResult()).getChoices();
+
+ if (choices.isEmpty()) {
+ return "";
+ }
+
+ final var choice = choices.get(0);
+
+ if ("content_filter".equals(choice.getFinishReason())) {
+ throw new OrchestrationClientException("Content filter filtered the output.");
+ }
+ return choice.getMessage().getContent();
+ }
+}
diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationClient.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationClient.java
index a2d14cbab..6053a8089 100644
--- a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationClient.java
+++ b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationClient.java
@@ -45,11 +45,13 @@ public class OrchestrationClient {
.visibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.NONE)
.visibility(PropertyAccessor.SETTER, JsonAutoDetect.Visibility.NONE)
.serializationInclusion(JsonInclude.Include.NON_NULL)
- .mixIn(LLMModuleResult.class, LLMModuleResultMixIn.class)
- .mixIn(ModuleResultsOutputUnmaskingInner.class, NoTypeInfoMixin.class)
- .mixIn(FilterConfig.class, NoTypeInfoMixin.class)
- .mixIn(MaskingProviderConfig.class, NoTypeInfoMixin.class)
- .mixIn(TemplatingModuleConfig.class, NoTypeInfoMixin.class)
+ .mixIn(LLMModuleResult.class, JacksonMixins.LLMModuleResultMixIn.class)
+ .mixIn(
+ ModuleResultsOutputUnmaskingInner.class,
+ JacksonMixins.ModuleResultsOutputUnmaskingInnerMixIn.class)
+ .mixIn(FilterConfig.class, JacksonMixins.NoTypeInfoMixin.class)
+ .mixIn(MaskingProviderConfig.class, JacksonMixins.NoTypeInfoMixin.class)
+ .mixIn(TemplatingModuleConfig.class, JacksonMixins.NoTypeInfoMixin.class)
.build();
}
@@ -99,12 +101,12 @@ public static CompletionPostRequest toCompletionPostRequest(
* @throws OrchestrationClientException if the request fails.
*/
@Nonnull
- public CompletionPostResponse chatCompletion(
+ public OrchestrationChatResponse chatCompletion(
@Nonnull final OrchestrationPrompt prompt, @Nonnull final OrchestrationModuleConfig config)
throws OrchestrationClientException {
val request = toCompletionPostRequest(prompt, config);
- return executeRequest(request);
+ return new OrchestrationChatResponse(executeRequest(request));
}
/**
diff --git a/orchestration/src/test/java/com/sap/ai/sdk/orchestration/OrchestrationUnitTest.java b/orchestration/src/test/java/com/sap/ai/sdk/orchestration/OrchestrationUnitTest.java
index 2bd9b7997..2af45a429 100644
--- a/orchestration/src/test/java/com/sap/ai/sdk/orchestration/OrchestrationUnitTest.java
+++ b/orchestration/src/test/java/com/sap/ai/sdk/orchestration/OrchestrationUnitTest.java
@@ -121,8 +121,7 @@ void testCompletion() {
final var result = client.chatCompletion(prompt, config);
assertThat(result).isNotNull();
- var orchestrationResult = (LLMModuleResultSynchronous) result.getOrchestrationResult();
- assertThat(orchestrationResult.getChoices().get(0).getMessage().getContent()).isNotEmpty();
+ assertThat(result.getContent()).isNotEmpty();
}
@Test
@@ -141,11 +140,12 @@ void testTemplating() throws IOException {
final var result =
client.chatCompletion(new OrchestrationPrompt(inputParams, template), config);
- assertThat(result.getRequestId()).isEqualTo("26ea36b5-c196-4806-a9a6-a686f0c6ad91");
- assertThat(result.getModuleResults().getTemplating().get(0).getContent())
+ final var response = result.getOriginalResponse();
+ assertThat(response.getRequestId()).isEqualTo("26ea36b5-c196-4806-a9a6-a686f0c6ad91");
+ assertThat(response.getModuleResults().getTemplating().get(0).getContent())
.isEqualTo("Reply with 'Orchestration Service is working!' in German");
- assertThat(result.getModuleResults().getTemplating().get(0).getRole()).isEqualTo("user");
- var llm = (LLMModuleResultSynchronous) result.getModuleResults().getLlm();
+ assertThat(response.getModuleResults().getTemplating().get(0).getRole()).isEqualTo("user");
+ var llm = (LLMModuleResultSynchronous) response.getModuleResults().getLlm();
assertThat(llm.getId()).isEqualTo("chatcmpl-9lzPV4kLrXjFckOp2yY454wksWBoj");
assertThat(llm.getObject()).isEqualTo("chat.completion");
assertThat(llm.getCreated()).isEqualTo(1721224505);
@@ -160,7 +160,7 @@ void testTemplating() throws IOException {
assertThat(usage.getCompletionTokens()).isEqualTo(7);
assertThat(usage.getPromptTokens()).isEqualTo(19);
assertThat(usage.getTotalTokens()).isEqualTo(26);
- var orchestrationResult = (LLMModuleResultSynchronous) result.getOrchestrationResult();
+ var orchestrationResult = (LLMModuleResultSynchronous) response.getOrchestrationResult();
assertThat(orchestrationResult.getId()).isEqualTo("chatcmpl-9lzPV4kLrXjFckOp2yY454wksWBoj");
assertThat(orchestrationResult.getObject()).isEqualTo("chat.completion");
assertThat(orchestrationResult.getCreated()).isEqualTo(1721224505);
@@ -286,7 +286,8 @@ void messagesHistory() throws IOException {
final var result = client.chatCompletion(prompt, config);
- assertThat(result.getRequestId()).isEqualTo("26ea36b5-c196-4806-a9a6-a686f0c6ad91");
+ assertThat(result.getOriginalResponse().getRequestId())
+ .isEqualTo("26ea36b5-c196-4806-a9a6-a686f0c6ad91");
// verify that the history is sent correctly
try (var requestInputStream = fileLoader.apply("messagesHistoryRequest.json")) {
@@ -298,7 +299,7 @@ void messagesHistory() throws IOException {
}
@Test
- void maskingAnonymization() throws IOException {
+ void maskingPseudonymization() throws IOException {
stubFor(
post(urlPathEqualTo("/v2/inference/deployments/abcdef0123456789/completion"))
.willReturn(
@@ -307,18 +308,16 @@ void maskingAnonymization() throws IOException {
.withHeader("Content-Type", "application/json")));
final var maskingConfig =
- createMaskingConfig(DPIConfig.MethodEnum.ANONYMIZATION, DPIEntities.PHONE);
+ createMaskingConfig(DPIConfig.MethodEnum.PSEUDONYMIZATION, DPIEntities.PHONE);
final var result = client.chatCompletion(prompt, config.withMaskingConfig(maskingConfig));
+ final var response = result.getOriginalResponse();
- assertThat(result).isNotNull();
- GenericModuleResult inputMasking = result.getModuleResults().getInputMasking();
+ assertThat(response).isNotNull();
+ GenericModuleResult inputMasking = response.getModuleResults().getInputMasking();
assertThat(inputMasking.getMessage()).isEqualTo("Input to LLM is masked successfully.");
assertThat(inputMasking.getData()).isNotNull();
- final var choices = ((LLMModuleResultSynchronous) result.getOrchestrationResult()).getChoices();
- assertThat(choices.get(0).getMessage().getContent())
- .isEqualTo(
- "I'm sorry, I cannot provide information about specific individuals, including their nationality.");
+ assertThat(result.getContent()).contains("Hi Mallory");
// verify that the request is sent correctly
try (var requestInputStream = fileLoader.apply("maskingRequest.json")) {
@@ -414,4 +413,17 @@ void testErrorHandling() {
softly.assertAll();
}
+
+ @Test
+ void testEmptyChoicesResponse() {
+ stubFor(
+ post(urlPathEqualTo("/v2/inference/deployments/abcdef0123456789/completion"))
+ .willReturn(
+ aResponse()
+ .withBodyFile("emptyChoicesResponse.json")
+ .withHeader("Content-Type", "application/json")));
+ final var result = client.chatCompletion(prompt, config);
+
+ assertThat(result.getContent()).isEmpty();
+ }
}
diff --git a/orchestration/src/test/resources/__files/emptyChoicesResponse.json b/orchestration/src/test/resources/__files/emptyChoicesResponse.json
new file mode 100644
index 000000000..3d36bdcd2
--- /dev/null
+++ b/orchestration/src/test/resources/__files/emptyChoicesResponse.json
@@ -0,0 +1,35 @@
+{
+ "request_id": "26ea36b5-c196-4806-a9a6-a686f0c6ad91",
+ "module_results": {
+ "templating": [
+ {
+ "role": "user",
+ "content": "Reply with 'Orchestration Service is working!' in German"
+ }
+ ],
+ "llm": {
+ "id": "chatcmpl-9lzPV4kLrXjFckOp2yY454wksWBoj",
+ "object": "chat.completion",
+ "created": 1721224505,
+ "model": "gpt-35-turbo-16k",
+ "choices": [],
+ "usage": {
+ "completion_tokens": 7,
+ "prompt_tokens": 19,
+ "total_tokens": 26
+ }
+ }
+ },
+ "orchestration_result": {
+ "id": "chatcmpl-9lzPV4kLrXjFckOp2yY454wksWBoj",
+ "object": "chat.completion",
+ "created": 1721224505,
+ "model": "gpt-35-turbo-16k",
+ "choices": [],
+ "usage": {
+ "completion_tokens": 7,
+ "prompt_tokens": 19,
+ "total_tokens": 26
+ }
+ }
+}
diff --git a/orchestration/src/test/resources/__files/maskingResponse.json b/orchestration/src/test/resources/__files/maskingResponse.json
index 1f9993b6c..2631c5d09 100644
--- a/orchestration/src/test/resources/__files/maskingResponse.json
+++ b/orchestration/src/test/resources/__files/maskingResponse.json
@@ -1,74 +1,84 @@
{
- "request_id": "4c1b423d-f3a9-443e-9876-091888b0d585",
+ "request_id": "c252c73e-849f-4fa7-8b35-b16f3434a0da",
"module_results": {
- "grounding": null,
"templating": [
+ {
+ "role": "system",
+ "content": "Please write an initial response to the below user feedback, stating that we are working on the feedback and will get back to them soon.\nPlease make sure to address the user in person and end with \"Best regards, the AI SDK team\".\n"
+ },
{
"role": "user",
- "content": "What is the nationality of Patrick Morgan +49 (970) 333-3833"
+ "content": "Username: Mallory\nuserEmail: mallory@sap.com\nDate: 2022-01-01\n\nI think the SDK is good, but could use some further enhancements.\nMy architect Alice and manager Bob pointed out that we need the grounding capabilities, which aren't supported yet.\n"
}
],
"input_masking": {
"message": "Input to LLM is masked successfully.",
"data": {
"masked_template": [
+ {
+ "role": "system",
+ "content": "Please write an initial response to the below user feedback, stating that we are working on the feedback and will get back to them soon.\nPlease make sure to address the user in person and end with \"Best regards, the AI SDK team\".\n"
+ },
{
"role": "user",
- "content": "What is the nationality of Patrick Morgan MASKED_PHONE_NUMBER"
+ "content": "Username: MASKED_PERSON_2\nuserEmail: MASKED_PERSON_3MASKED_EMAIL_1\nDate: 2022-01-01\n\nI think the SDK is good, but could use some further enhancements.\nMy architect MASKED_PERSON_1 and manager MASKED_PERSON_4 pointed out that we need the grounding capabilities, which aren't supported yet.\n"
}
]
}
},
- "input_filtering": null,
"llm": {
+ "id": "chatcmpl-AUr7GVVoQbg52GRbADpkx4hQvDBiz",
"object": "chat.completion",
- "id": "chatcmpl-ADVDtCV54jp7BCKxeyWs3MWPFNx3N",
- "created": 1727781649,
+ "created": 1731917382,
"model": "gpt-35-turbo",
- "system_fingerprint": "fp_e49e4201a9",
+ "system_fingerprint": "fp_808245b034",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
- "content": "I'm sorry, I cannot provide information about specific individuals, including their nationality."
- },
- "logprobs": {
+ "content": "Hi MASKED_PERSON_2,\n\nThank you for your feedback on the SDK. We appreciate your insights and are currently working on further enhancements, including the grounding capabilities that your architect and manager have highlighted. We will take your suggestions into consideration and aim to address them in our future updates.\n\nBest regards, the AI SDK team"
},
"finish_reason": "stop"
}
],
"usage": {
- "completion_tokens": 16,
- "prompt_tokens": 18,
- "total_tokens": 34
+ "completion_tokens": 64,
+ "prompt_tokens": 134,
+ "total_tokens": 198
}
},
- "output_filtering": null,
- "output_unmasking": []
+ "output_unmasking": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "Hi Mallory,\n\nThank you for your feedback on the SDK. We appreciate your insights and are currently working on further enhancements, including the grounding capabilities that your architect and manager have highlighted. We will take your suggestions into consideration and aim to address them in our future updates.\n\nBest regards, the AI SDK team"
+ },
+ "finish_reason": "stop"
+ }
+ ]
},
"orchestration_result": {
+ "id": "chatcmpl-AUr7GVVoQbg52GRbADpkx4hQvDBiz",
"object": "chat.completion",
- "id": "chatcmpl-ADVDtCV54jp7BCKxeyWs3MWPFNx3N",
- "created": 1727781649,
+ "created": 1731917382,
"model": "gpt-35-turbo",
- "system_fingerprint": "fp_e49e4201a9",
+ "system_fingerprint": "fp_808245b034",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
- "content": "I'm sorry, I cannot provide information about specific individuals, including their nationality."
- },
- "logprobs": {
+ "content": "Hi Mallory,\n\nThank you for your feedback on the SDK. We appreciate your insights and are currently working on further enhancements, including the grounding capabilities that your architect and manager have highlighted. We will take your suggestions into consideration and aim to address them in our future updates.\n\nBest regards, the AI SDK team"
},
"finish_reason": "stop"
}
],
"usage": {
- "completion_tokens": 16,
- "prompt_tokens": 18,
- "total_tokens": 34
+ "completion_tokens": 64,
+ "prompt_tokens": 134,
+ "total_tokens": 198
}
}
}
diff --git a/orchestration/src/test/resources/maskingRequest.json b/orchestration/src/test/resources/maskingRequest.json
index 67cd5c16f..03c19275e 100644
--- a/orchestration/src/test/resources/maskingRequest.json
+++ b/orchestration/src/test/resources/maskingRequest.json
@@ -23,7 +23,7 @@
"masking_providers": [
{
"type": "sap_data_privacy_integration",
- "method": "anonymization",
+ "method": "pseudonymization",
"entities": [
{
"type": "profile-phone"
diff --git a/pom.xml b/pom.xml
index 9f3bba667..56eed8797 100644
--- a/pom.xml
+++ b/pom.xml
@@ -63,7 +63,7 @@