diff --git a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java index eedde21397eb..400ee58218ae 100644 --- a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java +++ b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java @@ -156,6 +156,37 @@ private static boolean canContainDeletesForFile( case EQUALITY_DELETES: return canContainEqDeletesForFile(dataFile, deleteFile, schema); + + case PARTIAL_UPDATE: + return canContainPartialUpdateDeletesForFile(dataFile, deleteFile, schema); + } + + return true; + } + + // todo: add actual implementation + private static boolean canContainPartialUpdateDeletesForFile( + DataFile dataFile, DeleteFile deleteFile, Schema schema) { + // check that the delete file can contain the data file's file_path + Map lowers = deleteFile.lowerBounds(); + Map uppers = deleteFile.upperBounds(); + if (lowers == null || uppers == null) { + return true; + } + + Type pathType = MetadataColumns.DELETE_FILE_PATH.type(); + int pathId = MetadataColumns.DELETE_FILE_PATH.fieldId(); + Comparator comparator = Comparators.charSequences(); + ByteBuffer lower = lowers.get(pathId); + if (lower != null + && comparator.compare(dataFile.path(), Conversions.fromByteBuffer(pathType, lower)) < 0) { + return false; + } + + ByteBuffer upper = uppers.get(pathId); + if (upper != null + && comparator.compare(dataFile.path(), Conversions.fromByteBuffer(pathType, upper)) > 0) { + return false; } return true; @@ -474,6 +505,21 @@ DeleteFileIndex build() { globalApplySeqs = eqFilesSortedBySeq.stream().mapToLong(Pair::first).toArray(); globalDeletes = eqFilesSortedBySeq.stream().map(Pair::second).toArray(DeleteFile[]::new); + List> partialDeleteSortedBySeq = + deleteFilesByPartition.get(partition).stream() + .filter(entry -> entry.file().content() == FileContent.PARTIAL_UPDATE) + .map( + entry -> + // a delete file is indexed by the sequence number it should be applied to + Pair.of(entry.dataSequenceNumber(), entry.file())) + .sorted(Comparator.comparingLong(Pair::first)) + .collect(Collectors.toList()); + if (partialDeleteSortedBySeq.size() > 0) { + globalApplySeqs = partialDeleteSortedBySeq.stream().mapToLong(Pair::first).toArray(); + globalDeletes = + partialDeleteSortedBySeq.stream().map(Pair::second).toArray(DeleteFile[]::new); + } + List> posFilesSortedBySeq = deleteFilesByPartition.get(partition).stream() .filter(entry -> entry.file().content() == FileContent.POSITION_DELETES) diff --git a/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java b/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java index ad938be7245d..0f1315f3b117 100644 --- a/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java +++ b/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java @@ -195,7 +195,7 @@ public PartialDeleteWriter newPartialWriter( default: throw new UnsupportedOperationException( - "Cannot write equality-deletes for unsupported file format: " + format); + "Cannot write equality-deletes for unsupported file format: " + format); } } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/deploy.gradle b/deploy.gradle index 8e0d43fe02e4..4273af28c57e 100644 --- a/deploy.gradle +++ b/deploy.gradle @@ -81,7 +81,7 @@ subprojects { } } - groupId = 'org.apache.iceberg' + groupId = 'org.apache.iceberg-kn' pom { name = 'Apache Iceberg' description = 'A table format for huge analytic datasets' @@ -109,18 +109,7 @@ subprojects { } repositories { - maven { - credentials { - username project.hasProperty('mavenUser') ? "$mavenUser" : "" - password project.hasProperty('mavenPassword') ? "$mavenPassword" : "" - } - // upload to the releases repository using ./gradlew -Prelease publish - def apacheSnapshotsRepoUrl = 'https://repository.apache.org/content/repositories/snapshots' - def apacheReleasesRepoUrl = 'https://repository.apache.org/service/local/staging/deploy/maven2' - def snapshotsRepoUrl = project.hasProperty('mavenSnapshotsRepo') ? "$mavenSnapshotsRepo" : "$apacheSnapshotsRepoUrl" - def releasesRepoUrl = project.hasProperty('mavenReleasesRepo') ? "$mavenReleasesRepo" : "$apacheReleasesRepoUrl" - url = project.hasProperty('release') ? releasesRepoUrl : snapshotsRepoUrl - } + mavenLocal() } }