-
Notifications
You must be signed in to change notification settings - Fork 238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OPIK-622] Remove deprecated dataset item fields #1003
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,28 +105,9 @@ static Publisher<DatasetItem> mapItem(Result results) { | |
|
||
Map<String, JsonNode> data = getData(row); | ||
|
||
JsonNode input = getJsonNode(row, data, "input"); | ||
JsonNode expectedOutput = getJsonNode(row, data, "expected_output"); | ||
JsonNode metadata = getJsonNode(row, data, "metadata"); | ||
Comment on lines
-108
to
-110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
if (!data.containsKey("input")) { | ||
data.put("input", input); | ||
} | ||
|
||
if (!data.containsKey("expected_output")) { | ||
data.put("expected_output", expectedOutput); | ||
} | ||
|
||
if (!data.containsKey("metadata")) { | ||
data.put("metadata", metadata); | ||
} | ||
|
||
return DatasetItem.builder() | ||
.id(row.get("id", UUID.class)) | ||
.input(input) | ||
.data(data) | ||
.expectedOutput(expectedOutput) | ||
.metadata(metadata) | ||
.source(DatasetItemSource.fromString(row.get("source", String.class))) | ||
.traceId(Optional.ofNullable(row.get("trace_id", String.class)) | ||
.filter(s -> !s.isBlank()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,7 +99,6 @@ | |
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -2770,18 +2769,16 @@ public Stream<Arguments> invalidDatasetItems() { | |
return Stream.of( | ||
arguments(factory.manufacturePojo(DatasetItemBatch.class).toBuilder() | ||
.items(List.of(factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.input(null) | ||
.data(null) | ||
.build())) | ||
.build(), | ||
"items[0].input must provide either input or data field"), | ||
"items[0].data must provide data field"), | ||
arguments(factory.manufacturePojo(DatasetItemBatch.class).toBuilder() | ||
.items(List.of(factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.input(null) | ||
.data(Map.of()) | ||
.build())) | ||
.build(), | ||
"items[0].input must provide either input or data field"), | ||
"items[0].data must provide data field"), | ||
arguments(factory.manufacturePojo(DatasetItemBatch.class).toBuilder() | ||
.items(List.of(factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.source(null) | ||
|
@@ -2973,28 +2970,10 @@ void createDatasetItem__whenDatasetItemWorkspaceAndSpanWorkspaceDoesNotMatch__th | |
} | ||
} | ||
|
||
@Test | ||
@DisplayName("when data is null, the accept the request") | ||
void create__whenDataIsNull__thenAcceptTheRequest() { | ||
var item = factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.data(null) | ||
.build(); | ||
|
||
var batch = factory.manufacturePojo(DatasetItemBatch.class).toBuilder() | ||
.items(List.of(item)) | ||
.datasetId(null) | ||
.build(); | ||
|
||
putAndAssert(batch, TEST_WORKSPACE, API_KEY); | ||
|
||
getItemAndAssert(item, TEST_WORKSPACE, API_KEY); | ||
} | ||
|
||
@Test | ||
@DisplayName("when input is null but data is present, the accept the request") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please review the display name of the test and everything else. |
||
void create__whenInputIsNullButDataIsPresent__thenAcceptTheRequest() { | ||
var item = factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.input(null) | ||
.build(); | ||
|
||
var batch = factory.manufacturePojo(DatasetItemBatch.class).toBuilder() | ||
|
@@ -3160,7 +3139,6 @@ void streamDataItems__whenStreamingHasMaxSize__thenReturnItemsSortedByCreatedDat | |
var items = IntStream.range(0, 1000) | ||
.mapToObj(i -> factory.manufacturePojo(DatasetItem.class).toBuilder() | ||
.experimentItems(null) | ||
.metadata(null) | ||
.createdAt(null) | ||
.lastUpdatedAt(null) | ||
.build()) | ||
|
@@ -3232,8 +3210,6 @@ private void getItemAndAssert(DatasetItem expectedDatasetItem, String workspaceN | |
Map<String, JsonNode> data = Optional.ofNullable(expectedDatasetItem.data()) | ||
.orElse(Map.of()); | ||
|
||
expectedDatasetItem = mergeInputMap(expectedDatasetItem, data); | ||
|
||
assertThat(actualEntity.id()).isEqualTo(expectedDatasetItem.id()); | ||
assertThat(actualEntity).usingRecursiveComparison() | ||
.ignoringFields(IGNORED_FIELDS_DATA_ITEM) | ||
|
@@ -3243,37 +3219,6 @@ private void getItemAndAssert(DatasetItem expectedDatasetItem, String workspaceN | |
assertThat(actualEntity.lastUpdatedAt()).isInThePast(); | ||
} | ||
|
||
private DatasetItem mergeInputMap(DatasetItem expectedDatasetItem, Map<String, JsonNode> data) { | ||
|
||
Map<String, JsonNode> newMap = new HashMap<>(); | ||
|
||
if (expectedDatasetItem.expectedOutput() != null) { | ||
newMap.put("expected_output", expectedDatasetItem.expectedOutput()); | ||
} | ||
|
||
if (expectedDatasetItem.input() != null) { | ||
newMap.put("input", expectedDatasetItem.input()); | ||
} | ||
|
||
if (expectedDatasetItem.metadata() != null) { | ||
newMap.put("metadata", expectedDatasetItem.metadata()); | ||
} | ||
|
||
Map<String, JsonNode> mergedMap = Stream | ||
.concat(data.entrySet().stream(), newMap.entrySet().stream()) | ||
.collect(toMap( | ||
Map.Entry::getKey, | ||
Map.Entry::getValue, | ||
(v1, v2) -> v2 // In case of conflict, use the value from map2 | ||
)); | ||
|
||
expectedDatasetItem = expectedDatasetItem.toBuilder() | ||
.data(new HashMap<>(mergedMap)) | ||
.build(); | ||
|
||
return expectedDatasetItem; | ||
} | ||
|
||
@Nested | ||
@DisplayName("Delete items:") | ||
@TestInstance(TestInstance.Lifecycle.PER_CLASS) | ||
|
@@ -3474,7 +3419,10 @@ void getDatasetItemsByDatasetId__whenItemsWereUpdated__thenReturnCorrectItemsCou | |
|
||
var updatedItems = items | ||
.stream() | ||
.map(item -> item.toBuilder().input(factory.manufacturePojo(JsonNode.class)).build()) | ||
.map(item -> item.toBuilder() | ||
.data(Map.of(factory.manufacturePojo(String.class), | ||
factory.manufacturePojo(JsonNode.class))) | ||
.build()) | ||
.toList(); | ||
|
||
var updatedBatch = batch.toBuilder() | ||
|
@@ -3646,8 +3594,6 @@ private void assertPage(List<DatasetItem> expectedItems, List<DatasetItem> actua | |
Map<String, JsonNode> data = Optional.ofNullable(expectedDatasetItem.data()) | ||
.orElse(Map.of()); | ||
|
||
expectedDatasetItem = mergeInputMap(expectedDatasetItem, data); | ||
|
||
assertThat(actualDatasetItem.data()).isEqualTo(expectedDatasetItem.data()); | ||
} | ||
} | ||
|
@@ -4137,12 +4083,6 @@ Stream<Arguments> find__whenFilteringBySupportedFields__thenReturnMatchingRows() | |
FieldType.FEEDBACK_SCORES_NUMBER, Operator.EQUAL, "sql_cost", "10")), | ||
arguments(new ExperimentsComparisonFilter("feedback_scores", | ||
FieldType.FEEDBACK_SCORES_NUMBER, Operator.NOT_EQUAL, "sql_cost", "10")), | ||
arguments(new ExperimentsComparisonFilter("output", | ||
FieldType.STRING, Operator.CONTAINS, null, "sql_cost")), | ||
arguments(new ExperimentsComparisonFilter("expected_output", | ||
FieldType.DICTIONARY, Operator.CONTAINS, "output", "sql_cost")), | ||
arguments(new ExperimentsComparisonFilter("metadata", | ||
FieldType.DICTIONARY, Operator.EQUAL, "sql_cost", "10")), | ||
Comment on lines
-4140
to
-4145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe you shouldn't remove these tests, as it's still legitimate to filter on those fields, but within |
||
arguments(new ExperimentsComparisonFilter("meta_field", | ||
FieldType.DICTIONARY, Operator.CONTAINS, "version[*]", "10")), | ||
arguments(new ExperimentsComparisonFilter("releases", | ||
|
@@ -4197,8 +4137,6 @@ private void createTraces(List<DatasetItem> items, String projectName, String wo | |
var item = items.get(i); | ||
var trace = Trace.builder() | ||
.id(GENERATOR.generate()) | ||
.input(item.input()) | ||
.output(item.expectedOutput()) | ||
Comment on lines
-4200
to
-4201
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related: legitimate payload in the trace, but should come from |
||
.projectName(projectName) | ||
.startTime(Instant.now()) | ||
.name("trace-" + i) | ||
|
@@ -4214,12 +4152,6 @@ private void createDatasetItems(List<DatasetItem> items) { | |
if (i == 0) { | ||
DatasetItem item = factory.manufacturePojo(DatasetItem.class) | ||
.toBuilder() | ||
.input(JsonUtils | ||
.getJsonNodeFromString(JsonUtils.writeValueAsString(Map.of("input", "sql_cost")))) | ||
.expectedOutput(JsonUtils | ||
.getJsonNodeFromString(JsonUtils.writeValueAsString(Map.of("output", "sql_cost")))) | ||
.metadata(JsonUtils | ||
.getJsonNodeFromString(JsonUtils.writeValueAsString(Map.of("sql_cost", 10)))) | ||
Comment on lines
-4217
to
-4222
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems legitimate, but within |
||
.source(DatasetItemSource.SDK) | ||
.data(Map.of( | ||
"sql_tag", JsonUtils.readTree("sql_test"), | ||
|
@@ -4748,18 +4680,12 @@ void setUp() { | |
.flatMap(connection -> Mono.from(connection.createStatement(""" | ||
INSERT INTO %s.%s ( | ||
id, | ||
input, | ||
expected_output, | ||
metadata, | ||
source, | ||
dataset_id, | ||
workspace_id | ||
) VALUES (:id, :input, :expected_output, :metadata, :source, :dataset_id, :workspace_id) | ||
) VALUES (:id, :source, :dataset_id, :workspace_id) | ||
""".formatted(DATABASE_NAME, "dataset_items")) | ||
.bind("id", datasetItem.id()) | ||
.bind("input", datasetItem.input().toString()) | ||
.bind("expected_output", datasetItem.expectedOutput().toString()) | ||
.bind("metadata", datasetItem.metadata().toString()) | ||
Comment on lines
-4760
to
-4762
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably the whole |
||
.bind("source", DatasetItemSource.SDK.getValue()) | ||
.bind("dataset_id", datasetId) | ||
.bind("workspace_id", WORKSPACE_ID) | ||
|
@@ -4768,22 +4694,6 @@ void setUp() { | |
|
||
this.datasetItem = datasetItem; | ||
} | ||
|
||
@Test | ||
void findById__whenDatasetItemNotMigrated__thenReturnDatasetItemWithData() { | ||
|
||
var item = datasetItem.toBuilder() | ||
.spanId(null) | ||
.traceId(null) | ||
.experimentItems(null) | ||
.source(DatasetItemSource.SDK) | ||
.data(Map.of("input", datasetItem.input(), | ||
"expected_output", datasetItem.expectedOutput(), | ||
"metadata", datasetItem.metadata())) | ||
.build(); | ||
|
||
getItemAndAssert(item, TEST_WORKSPACE, API_KEY); | ||
} | ||
} | ||
|
||
private Set<Column> addDeprecatedFields(List<Map<String, JsonNode>> data) { | ||
|
@@ -4799,12 +4709,6 @@ private Set<Column> addDeprecatedFields(List<Map<String, JsonNode>> data) { | |
.build()) | ||
.collect(Collectors.toCollection(HashSet::new)); | ||
|
||
columns.add(Column.builder().name("input").types(Set.of(ColumnType.OBJECT)).filterFieldPrefix("data").build()); | ||
columns.add(Column.builder().name("expected_output").types(Set.of(ColumnType.OBJECT)).filterFieldPrefix("data") | ||
.build()); | ||
columns.add( | ||
Column.builder().name("metadata").types(Set.of(ColumnType.OBJECT)).filterFieldPrefix("data").build()); | ||
Comment on lines
-4802
to
-4806
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably the whole |
||
|
||
Map<String, Set<ColumnType>> results = columns.stream() | ||
.collect(groupingBy(Column::name, mapping(Column::types, flatMapping(Set::stream, toSet())))); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this whole class doesn't make sense anymore. You should completely remove it, along with the related annotation and then simply add a
@NotEmpty
annotation to thedata
field ofDatasetItem
, which works with collections (null or not empty on the map).Please make sure there's a test covering that this works fine.