From 29a80a917fcb60625107ebb278955624d5dc5463 Mon Sep 17 00:00:00 2001 From: Morgan Kleene Date: Mon, 20 May 2024 13:42:15 -0400 Subject: [PATCH] feat(sdk): update archive support (#47) Addresses https://github.com/opentdf/java-sdk/issues/27 and https://github.com/opentdf/java-sdk/issues/18 The API looks like: ```java byte[] zipFileBytes = new ZipWriter().file("file1", bytes).file("file 2", stream).build(); var reader = new ZipReader(fileChannel); for (var entry: reader.getEntries()) { System.out.println(entry.getName()); var input = entry.getData(); // .. then use the input stream to get the contents } ``` --------- Co-authored-by: Mikhail Ustiantsev <39340048+mustyantsev@users.noreply.github.com> --- sdk/pom.xml | 6 + .../io/opentdf/platform/sdk/ZipReader.java | 408 ++++++---- .../io/opentdf/platform/sdk/ZipWriter.java | 745 ++++++++++-------- .../opentdf/platform/sdk/ZipReaderTest.java | 160 +++- .../opentdf/platform/sdk/ZipWriterTest.java | 167 +++- 5 files changed, 914 insertions(+), 572 deletions(-) diff --git a/sdk/pom.xml b/sdk/pom.xml index ac179299..d69b0294 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -83,5 +83,11 @@ 4.13.1 test + + org.apache.commons + commons-compress + 1.26.1 + test + diff --git a/sdk/src/main/java/io/opentdf/platform/sdk/ZipReader.java b/sdk/src/main/java/io/opentdf/platform/sdk/ZipReader.java index ec5b9e68..cc9d8d1e 100644 --- a/sdk/src/main/java/io/opentdf/platform/sdk/ZipReader.java +++ b/sdk/src/main/java/io/opentdf/platform/sdk/ZipReader.java @@ -1,208 +1,290 @@ package io.opentdf.platform.sdk; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; public class ZipReader { + + public static final Logger logger = LoggerFactory.getLogger(ZipReader.class); + public static final int END_OF_CENTRAL_DIRECTORY_SIZE = 22; + public static final int ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIZE = 20; + + final ByteBuffer longBuf = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.LITTLE_ENDIAN); + private Long readLong() throws IOException { + longBuf.clear(); + if (this.zipChannel.read(longBuf) != 8) { + return null; + } + longBuf.flip(); + return longBuf.getLong(); + } + + final ByteBuffer intBuf = ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN); + private Integer readInt() throws IOException { + intBuf.clear(); + if (this.zipChannel.read(intBuf) != 4) { + return null; + } + intBuf.flip(); + return intBuf.getInt(); + } + + final ByteBuffer shortBuf = ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN); + + private Short readShort() throws IOException { + shortBuf.clear(); + if (this.zipChannel.read(shortBuf) != 2) { + return null; + } + shortBuf.flip(); + return shortBuf.getShort(); + } + + private static class CentralDirectoryRecord { + final long numEntries; + final long offsetToStart; + + public CentralDirectoryRecord(long numEntries, long offsetToStart) { + this.numEntries = numEntries; + this.offsetToStart = offsetToStart; + } + } + + private static final int ZIP_64_END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06064b50; private static final int END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06054b50; - private static final int ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06064b50; private static final int ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE = 0x07064b50; - private static final int CENTRAL_DIRECTORY_LOCATOR_SIGNATURE = 0x02014b50; - private static final int LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; + private static final int CENTRAL_FILE_HEADER_SIGNATURE = 0x02014b50; + + private static final int LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; private static final int ZIP64_MAGICVAL = 0xFFFFFFFF; private static final int ZIP64_EXTID= 0x0001; - private int numEntries; - private short fileNameLength; - private short extraFieldLength; - private long offsetToStartOfCentralDirectory; - private long relativeOffsetEndOfZip64EndOfCentralDirectory; - - public void readEndOfCentralDirectory(ByteBuffer buffer) throws Exception { - buffer.order(ByteOrder.LITTLE_ENDIAN); - long fileSize = buffer.capacity(); - long pointer = fileSize - 22; // 22 is the minimum size of the EOCDR - - // Search for the EOCDR from the end of the file - while (pointer >= 0) { - buffer.position((int)pointer); - int signature = buffer.getInt(); + CentralDirectoryRecord readEndOfCentralDirectory() throws IOException { + long eoCDRStart = zipChannel.size() - END_OF_CENTRAL_DIRECTORY_SIZE; // 22 is the minimum size of the EOCDR + + while (eoCDRStart >= 0) { + zipChannel.position(eoCDRStart); + int signature = readInt(); if (signature == END_OF_CENTRAL_DIRECTORY_SIGNATURE) { - System.out.println("Found End of Central Directory Record"); + if (logger.isDebugEnabled()) { + logger.debug("Found end of central directory signature at {}", zipChannel.position() - Integer.BYTES); + } break; } - pointer--; + eoCDRStart--; } - if (pointer < 0) { - throw new Exception("Invalid tdf file"); + if (eoCDRStart < 0) { + throw new RuntimeException("Didn't find the end of central directory"); } - // Read the EOCDR - short diskNumber = buffer.getShort(); - short centralDirectoryDiskNumber = buffer.getShort(); - short numEntriesThisDisk = buffer.getShort(); - numEntries = buffer.getShort(); - int centralDirectorySize = buffer.getInt(); - offsetToStartOfCentralDirectory = buffer.getInt(); - short commentLength = buffer.getShort(); + short diskNumber = readShort(); + short centralDirectoryDiskNumber = readShort(); + short numCDEntriesOnThisDisk = readShort(); + + int totalNumEntries = readShort(); + int sizeOfCentralDirectory = readInt(); + long offsetToStartOfCentralDirectory = readInt(); + short commentLength = readShort(); - // buffer's position at the start of the Central Directory - boolean isZip64 = false; if (offsetToStartOfCentralDirectory != ZIP64_MAGICVAL) { - //buffer.position((int)offsetToStartOfCentralDirectory); - } else { - isZip64 = true; - long index = fileSize - (22+ 20); // 22 is the size of the EOCDR and 20 is the size of the Zip64 EOCDR - buffer.position((int)index); - readZip64EndOfCentralDirectoryLocator(buffer); - index = fileSize - (22 + 20 + 56); // 56 is the size of the Zip64 EOCDR - buffer.position((int)index); - readZip64EndOfCentralDirectoryRecord(buffer); - //buffer.position((int)offsetToStartOfCentralDirectory); - } - // buffer.position(centralDirectoryOffset); + return new CentralDirectoryRecord(totalNumEntries, offsetToStartOfCentralDirectory); + } + + long zip64CentralDirectoryLocatorStart = zipChannel.size() - (ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIZE + END_OF_CENTRAL_DIRECTORY_SIZE + commentLength); + zipChannel.position(zip64CentralDirectoryLocatorStart); + return extractZIP64CentralDirectoryInfo(); } - private void readZip64EndOfCentralDirectoryLocator(ByteBuffer buffer) { - int signature = buffer.getInt() ; + private CentralDirectoryRecord extractZIP64CentralDirectoryInfo() throws IOException { + // buffer's position at the start of the Central Directory + int signature = readInt(); if (signature != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE) { throw new RuntimeException("Invalid Zip64 End of Central Directory Record Signature"); } - int numberOfDiskWithZip64End = buffer.getInt(); - relativeOffsetEndOfZip64EndOfCentralDirectory = buffer.getLong(); - int totalNumberOfDisks = buffer.getInt(); - } - private void readZip64EndOfCentralDirectoryRecord(ByteBuffer buffer) { - int signature = buffer.getInt() ; - if (signature != ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE) { - throw new RuntimeException("Invalid Zip64 End of Central Directory Record "); - } - long sizeOfZip64EndOfCentralDirectoryRecord = buffer.getLong(); - short versionMadeBy = buffer.getShort(); - short versionNeededToExtract = buffer.getShort(); - int diskNumber = buffer.getInt(); - int diskWithCentralDirectory = buffer.getInt(); - long numEntriesOnThisDisk = buffer.getLong(); - numEntries = (int)buffer.getLong(); - long centralDirectorySize = buffer.getLong(); - offsetToStartOfCentralDirectory = buffer.getLong(); - } + int centralDirectoryDiskNumber = readInt(); + long offsetToEndOfCentralDirectory = readLong(); + int totalNumberOfDisks = readInt(); - public int getNumEntries() { - return numEntries; - } + zipChannel.position(offsetToEndOfCentralDirectory); + int sig = readInt(); + if (sig != ZIP_64_END_OF_CENTRAL_DIRECTORY_SIGNATURE) { + throw new RuntimeException("Invalid"); + } + long sizeOfEndOfCentralDirectoryRecord = readLong(); + short versionMadeBy = readShort(); + short versionNeeded = readShort(); + int thisDiskNumber = readInt(); + int cdDiskNumber = readInt(); + long numCDEntriesOnThisDisk = readLong(); + long totalNumCDEntries = readLong(); + long cdSize = readLong(); + long cdOffset = readLong(); - public short getFileNameLength() { - return fileNameLength; + return new CentralDirectoryRecord(totalNumCDEntries, cdOffset); } - public short getExtraFieldLength() { - return extraFieldLength; - } + public class Entry { + private final long fileSize; + private final String fileName; + final long offsetToLocalHeader; - public long getCDOffset() { - return offsetToStartOfCentralDirectory; - } + private Entry(byte[] fileName, long offsetToLocalHeader, long fileSize) { + this.fileName = new String(fileName, StandardCharsets.UTF_8); + this.offsetToLocalHeader = offsetToLocalHeader; + this.fileSize = fileSize; + } - public long readCentralDirectoryFileHeader(ByteBuffer buffer) { - System.out.println("Buffer position: " + buffer.position()); - int signature = buffer.getInt(); - if (signature != CENTRAL_DIRECTORY_LOCATOR_SIGNATURE) { - throw new RuntimeException("Invalid Central Directory File Header Signature"); + public String getName() { + return fileName; } - short versionMadeBy = buffer.getShort(); - short versionNeededToExtract = buffer.getShort(); - short generalPurposeBitFlag = buffer.getShort(); - short compressionMethod = buffer.getShort(); - short lastModFileTime = buffer.getShort(); - short lastModFileDate = buffer.getShort(); - int crc32 = buffer.getInt(); - int compressedSize = buffer.getInt(); - int uncompressedSize = buffer.getInt(); - fileNameLength = buffer.getShort(); - extraFieldLength = buffer.getShort(); - short fileCommentLength = buffer.getShort(); - short diskNumberStart = buffer.getShort(); - short internalFileAttributes = buffer.getShort(); - int externalFileAttributes = buffer.getInt(); - long relativeOffsetOfLocalHeader = buffer.getInt() ; - - byte[] fileName = new byte[fileNameLength]; - buffer.get(fileName); - String fileNameString = new String(fileName, StandardCharsets.UTF_8); -//// - if (compressedSize == ZIP64_MAGICVAL || uncompressedSize == ZIP64_MAGICVAL || relativeOffsetOfLocalHeader == ZIP64_MAGICVAL) { - // Parse the extra field - for (int i = 0; i < extraFieldLength; ) { - int headerId = buffer.getShort(); - int dataSize = buffer.getShort(); - i += 4; - - if (headerId == ZIP64_EXTID) { - if (compressedSize == ZIP64_MAGICVAL) { - compressedSize = (int)buffer.getLong(); - i += 8; + + public InputStream getData() throws IOException { + zipChannel.position(offsetToLocalHeader); + if (readInt() != LOCAL_FILE_HEADER_SIGNATURE) { + throw new RuntimeException("Invalid Local Header Signature"); + } + zipChannel.position(zipChannel.position() + + Short.BYTES + + Short.BYTES + + Short.BYTES + + Short.BYTES + + Short.BYTES + + Integer.BYTES); + + long compressedSize = readInt(); + long uncompressedSize = readInt(); + int filenameLength = readShort(); + int extrafieldLength = readShort(); + + final long startPosition = zipChannel.position() + filenameLength + extrafieldLength; + final long endPosition = startPosition + fileSize; + final ByteBuffer buf = ByteBuffer.allocate(1); + return new InputStream() { + long offset = 0; + @Override + public int read() throws IOException { + if (doneReading()) { + return -1; + } + setChannelPosition(); + while (buf.position() != buf.capacity()) { + if (zipChannel.read(buf) < 0) { + return -1; + } + } + offset += 1; + return buf.array()[0] & 0xFF; + } + + private boolean doneReading() { + return offset >= fileSize; + } + + private void setChannelPosition() throws IOException { + var nextPosition = startPosition + offset; + if (zipChannel.position() != nextPosition) { + zipChannel.position(nextPosition); } - if (uncompressedSize == ZIP64_MAGICVAL) { - uncompressedSize = (int)buffer.getLong(); - i += 8; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (doneReading()) { + return -1; } - if (relativeOffsetOfLocalHeader == ZIP64_MAGICVAL) { - relativeOffsetOfLocalHeader = buffer.getLong(); - i += 8; + setChannelPosition(); + var lenToRead = (int)Math.min(len, fileSize - offset); // cast is always valid because len is an int + var buf = ByteBuffer.wrap(b, off, lenToRead); + var nread = zipChannel.read(buf); + if (nread > 0) { + offset += nread; } - } else { - // Skip other extra fields - buffer.position(buffer.position() + dataSize); - i += dataSize; + return nread; + } + }; + } + } + public Entry readCentralDirectoryFileHeader() throws IOException { + int signature = readInt(); + if (signature != CENTRAL_FILE_HEADER_SIGNATURE) { + throw new RuntimeException("Invalid Central Directory File Header Signature"); + } + short versionMadeBy = readShort(); + short versionNeededToExtract = readShort(); + short generalPurposeBitFlag = readShort(); + short compressionMethod = readShort(); + short lastModFileTime = readShort(); + short lastModFileDate = readShort(); + int crc32 = readInt(); + long compressedSize = readInt(); + long uncompressedSize = readInt(); + int fileNameLength = readShort(); + int extraFieldLength = readShort(); + short fileCommentLength = readShort(); + int diskNumberStart = readShort(); + short internalFileAttributes = readShort(); + int externalFileAttributes = readInt(); + long relativeOffsetOfLocalHeader = readInt(); + + ByteBuffer fileName = ByteBuffer.allocate(fileNameLength); + while (fileName.position() != fileName.capacity()) { + zipChannel.read(fileName); + } + + // Parse the extra field + for (final long startPos = zipChannel.position(); zipChannel.position() < startPos + extraFieldLength; ) { + long fieldStart = zipChannel.position(); + int headerId = readShort(); + int dataSize = readShort(); + + if (headerId == ZIP64_EXTID) { + if (compressedSize == -1) { + compressedSize = readLong(); + } + if (uncompressedSize == -1) { + uncompressedSize = readLong(); + } + if (relativeOffsetOfLocalHeader == -1) { + relativeOffsetOfLocalHeader = readLong(); + } + if (diskNumberStart == ZIP64_MAGICVAL) { + diskNumberStart = readInt(); } } + // Skip other extra fields + zipChannel.position(fieldStart + dataSize + 4); } -//// - byte[] extraField = new byte[extraFieldLength]; - buffer.get(extraField); - byte[] fileComment = new byte[fileCommentLength]; - buffer.get(fileComment); - String fileCommentString = new String(fileComment, StandardCharsets.UTF_8); - return relativeOffsetOfLocalHeader; + zipChannel.position(zipChannel.position() + fileCommentLength); + + return new Entry(fileName.array(), relativeOffsetOfLocalHeader, uncompressedSize); + } + + + public ZipReader(SeekableByteChannel channel) throws IOException { + zipChannel = channel; + var centralDirectoryRecord = readEndOfCentralDirectory(); + zipChannel.position(centralDirectoryRecord.offsetToStart); + for (int i = 0; i < centralDirectoryRecord.numEntries; i++) { + entries.add(readCentralDirectoryFileHeader()); + } } + + final SeekableByteChannel zipChannel; + final ArrayList entries = new ArrayList<>(); - public void readLocalFileHeader(ByteBuffer buffer) { - int signature = buffer.getInt(); - if (signature != LOCAL_FILE_HEADER_SIGNATURE) { - throw new RuntimeException("Invalid Local File Header Signature"); - } - short versionNeededToExtract = buffer.getShort(); - short generalPurposeBitFlag = buffer.getShort(); - short compressionMethod = buffer.getShort(); - short lastModFileTime = buffer.getShort(); - short lastModFileDate = buffer.getShort(); - int crc32 = buffer.getInt(); - int compressedSize = buffer.getInt(); - int uncompressedSize = buffer.getInt(); - short fileNameLength = buffer.getShort(); - short extraFieldLength = buffer.getShort(); - - byte[] fileName = new byte[fileNameLength]; - buffer.get(fileName); - String fileNameString = new String(fileName, StandardCharsets.UTF_8); - System.out.println("File name: " + fileNameString); - - byte[] extraField = new byte[extraFieldLength]; - buffer.get(extraField); - - /*byte[] fileData = new byte[compressedSize]; - buffer.get(fileData); - - if (compressionMethod == 0) { - String fileContent = new String(fileData, StandardCharsets.UTF_8); - System.out.println("File content: " + fileContent); - } else { - System.out.println("File is compressed, need to decompress it first"); - }*/ + public List getEntries() { + return entries; } } \ No newline at end of file diff --git a/sdk/src/main/java/io/opentdf/platform/sdk/ZipWriter.java b/sdk/src/main/java/io/opentdf/platform/sdk/ZipWriter.java index 50395b9a..70af96f4 100644 --- a/sdk/src/main/java/io/opentdf/platform/sdk/ZipWriter.java +++ b/sdk/src/main/java/io/opentdf/platform/sdk/ZipWriter.java @@ -1,366 +1,293 @@ package io.opentdf.platform.sdk; -import java.io.FileOutputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; -import java.time.ZoneOffset; import java.util.ArrayList; -import java.util.List; import java.util.zip.CRC32; public class ZipWriter { - private enum WriteState {Initial, Appending, Finished} - - private static final int ZIP_VERSION = 20; + private static final int ZIP_VERSION = 0x2D; private static final int ZIP_64_MAGIC_VAL = 0xFFFFFFFF; - private static final int ZIP_64_EXTENDED_LOCAL_INFO_EXTRA_FIELD_SIZE = 24; - private static final int ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE = 28; + private static final long ZIP_64_END_OF_CD_RECORD_SIZE = 56; + private static final int ZIP_64_LOCAL_EXTENDED_INFO_EXTRA_FIELD_SIZE = 24; + + private static final int ZIP_64_GLOBAL_EXTENDED_INFO_EXTRA_FIELD_SIZE = 28; private static final int ZIP_32_DATA_DESCRIPTOR_SIZE = 16; + + private static final int ZIP_64_DATA_DESCRIPTOR_SIZE = 24; private static final int HALF_SECOND = 2; private static final int BASE_YEAR = 1980; private static final int DEFAULT_SECOND_VALUE = 29; private static final int MONTH_SHIFT = 5; - private OutputStream writer; - private long currentOffset; - private long lastOffsetCDFileHeader; - private FileInfo fileInfo; - private List fileInfoEntries; - private WriteState writeState; - private boolean isZip64; - private long totalBytes; - - public ZipWriter(OutputStream writer) { - this.writer = writer; - this.currentOffset = 0; - this.lastOffsetCDFileHeader = 0; - this.fileInfo = new FileInfo(); - this.fileInfoEntries = new ArrayList<>(); - this.writeState = WriteState.Initial; - this.isZip64 = false; - this.totalBytes = 0; - } + private static class FileBytes { + public FileBytes(String name, byte[] data) { + this.name = name; + this.data = data; + } - public void enableZip64() { - this.isZip64 = true; + final String name; + final byte[] data; } - public void addHeader(String filename, long size) throws IOException { - if (filename == null || filename.isEmpty()) { - throw new IllegalArgumentException("Filename cannot be null or empty"); + private static class InputStream { + public InputStream(String name, java.io.InputStream data) { + this.name = name; + this.data = data; } - if (this.writeState != WriteState.Initial && this.writeState != WriteState.Finished) { - throw new IOException("Cannot add a new file until the current file write is completed: " + this.fileInfo.filename); - } - - this.fileInfo = new FileInfo(); - this.fileInfo.filename = filename; + final String name; + private final java.io.InputStream data; + } - if (!this.isZip64) { - this.isZip64 = size > 4L * 1024 * 1024 * 1024; // if file size is greater than 4GB - } + private final ArrayList byteFiles = new ArrayList<>(); + private final ArrayList streamFiles = new ArrayList<>(); - this.writeState = WriteState.Initial; - this.fileInfo.size = size; - this.fileInfo.filename = filename; + public ZipWriter file(String name, java.io.InputStream data) { + streamFiles.add(new InputStream(name, data)); + return this; } - public void addData(byte[] data) throws IOException { - long fileTime, fileDate; - fileTime = fileDate = getTimeDateUnMSDosFormat(); - - if (this.writeState == WriteState.Initial) { - LocalFileHeader localFileHeader = new LocalFileHeader(); - localFileHeader.signature = 0x04034b50; - localFileHeader.version = ZIP_VERSION; - localFileHeader.generalPurposeBitFlag = 0x08; - localFileHeader.compressionMethod = 0; - localFileHeader.lastModifiedTime = (int) fileTime; - localFileHeader.lastModifiedDate = (int) fileDate; - localFileHeader.crc32 = 0; - localFileHeader.compressedSize = 0; - localFileHeader.uncompressedSize = 0; - localFileHeader.extraFieldLength = 0; - - if (this.isZip64) { - localFileHeader.compressedSize = ZIP_64_MAGIC_VAL; - localFileHeader.uncompressedSize = ZIP_64_MAGIC_VAL; - localFileHeader.extraFieldLength = ZIP_64_EXTENDED_LOCAL_INFO_EXTRA_FIELD_SIZE; - } + public ZipWriter file(String name, byte[] content) { + byteFiles.add(new FileBytes(name, content)); + return this; + } - localFileHeader.filenameLength = (short) this.fileInfo.filename.length(); + public void build(OutputStream sink) throws IOException { + var out = new CountingOutputStream(sink); + ArrayList fileInfos = new ArrayList<>(); - // Write local file header - ByteBuffer buffer = ByteBuffer.allocate(30 + this.fileInfo.filename.length()); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(localFileHeader.signature); - buffer.putShort((short) localFileHeader.version); - buffer.putShort((short) localFileHeader.generalPurposeBitFlag); - buffer.putShort((short) localFileHeader.compressionMethod); - buffer.putShort((short) localFileHeader.lastModifiedTime); - buffer.putShort((short) localFileHeader.lastModifiedDate); - buffer.putInt(localFileHeader.crc32); - buffer.putInt(localFileHeader.compressedSize); - buffer.putInt(localFileHeader.uncompressedSize); - buffer.putShort(localFileHeader.filenameLength); - buffer.putShort(localFileHeader.extraFieldLength); - buffer.put(this.fileInfo.filename.getBytes(StandardCharsets.UTF_8)); - - this.writer.write(buffer.array()); - - if (this.isZip64) { - Zip64ExtendedLocalInfoExtraField zip64ExtendedLocalInfoExtraField = new Zip64ExtendedLocalInfoExtraField(); - zip64ExtendedLocalInfoExtraField.signature = 0x0001; - zip64ExtendedLocalInfoExtraField.size = ZIP_64_EXTENDED_LOCAL_INFO_EXTRA_FIELD_SIZE - 4; - zip64ExtendedLocalInfoExtraField.originalSize = this.fileInfo.size; - zip64ExtendedLocalInfoExtraField.compressedSize = this.fileInfo.size; - - buffer = ByteBuffer.allocate(ZIP_64_EXTENDED_LOCAL_INFO_EXTRA_FIELD_SIZE); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putShort((short) zip64ExtendedLocalInfoExtraField.signature); - buffer.putShort((short) zip64ExtendedLocalInfoExtraField.size); - buffer.putLong(zip64ExtendedLocalInfoExtraField.originalSize); - buffer.putLong(zip64ExtendedLocalInfoExtraField.compressedSize); - - this.writer.write(buffer.array()); - } + for (var byteFile : byteFiles) { + var fileInfo = writeByteArray(byteFile.name, byteFile.data, out); + fileInfos.add(fileInfo); + } - this.writeState = WriteState.Appending; - this.fileInfo.crc = new CRC32().getValue(); - this.fileInfo.fileTime = (short) fileTime; - this.fileInfo.fileDate = (short) fileDate; + for (var streamFile : streamFiles) { + var fileInfo = writeStream(streamFile.name, streamFile.data, out); + fileInfos.add(fileInfo); } - // Write the data contents - this.writer.write(data); + final var startOfCentralDirectory = out.position; + for (var fileInfo : fileInfos) { + writeCentralDirectoryHeader(fileInfo, out); + } - // Update CRC32 - CRC32 crc32 = new CRC32(); - crc32.update(data); - this.fileInfo.crc = crc32.getValue(); + final var sizeOfCentralDirectory = out.position - startOfCentralDirectory; + writeEndOfCentralDirectory(!streamFiles.isEmpty(), fileInfos.size(), startOfCentralDirectory, sizeOfCentralDirectory, out); + } - // Update file size - this.fileInfo.offset += data.length; + public byte[] build() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + build(out); + return out.toByteArray(); + } - // Check if we reached the end - if (this.fileInfo.offset >= this.fileInfo.size) { - this.writeState = WriteState.Finished; - this.fileInfo.offset = this.currentOffset; - this.fileInfo.flag = 0x08; - this.fileInfoEntries.add(this.fileInfo); + private static void writeCentralDirectoryHeader(FileInfo fileInfo, OutputStream out) throws IOException { + CDFileHeader cdFileHeader = new CDFileHeader(); + cdFileHeader.generalPurposeBitFlag = fileInfo.flag; + cdFileHeader.lastModifiedTime = fileInfo.fileTime; + cdFileHeader.lastModifiedDate = fileInfo.fileDate; + cdFileHeader.crc32 = (int) fileInfo.crc; + cdFileHeader.filenameLength = (short) fileInfo.filename.length(); + cdFileHeader.extraFieldLength = 0; + cdFileHeader.compressedSize = (int) fileInfo.size; + cdFileHeader.uncompressedSize = (int) fileInfo.size; + cdFileHeader.localHeaderOffset = (int) fileInfo.offset; + + if (fileInfo.isZip64) { + cdFileHeader.compressedSize = ZIP_64_MAGIC_VAL; + cdFileHeader.uncompressedSize = ZIP_64_MAGIC_VAL; + cdFileHeader.localHeaderOffset = ZIP_64_MAGIC_VAL; + cdFileHeader.extraFieldLength = ZIP_64_GLOBAL_EXTENDED_INFO_EXTRA_FIELD_SIZE; } - if (this.writeState == WriteState.Finished) { - if (this.isZip64) { - // Write Zip64 data descriptor - Zip64DataDescriptor zip64DataDescriptor = new Zip64DataDescriptor(); - zip64DataDescriptor.signature = 0x08074b50; - zip64DataDescriptor.crc32 = this.fileInfo.crc; - zip64DataDescriptor.compressedSize = this.fileInfo.size; - zip64DataDescriptor.uncompressedSize = this.fileInfo.size; - - ByteBuffer buffer = ByteBuffer.allocate(ZIP_32_DATA_DESCRIPTOR_SIZE); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(zip64DataDescriptor.signature); - buffer.putInt((int) zip64DataDescriptor.crc32); - buffer.putInt((int) zip64DataDescriptor.compressedSize); - buffer.putInt((int) zip64DataDescriptor.uncompressedSize); - - this.writer.write(buffer.array()); - - this.currentOffset += 30 + this.fileInfo.filename.length() + this.fileInfo.size + ZIP_64_EXTENDED_LOCAL_INFO_EXTRA_FIELD_SIZE + ZIP_32_DATA_DESCRIPTOR_SIZE; - } else { - // Write Zip32 data descriptor - Zip32DataDescriptor zip32DataDescriptor = new Zip32DataDescriptor(); - zip32DataDescriptor.signature = 0x08074b50; - zip32DataDescriptor.crc32 = this.fileInfo.crc; - zip32DataDescriptor.compressedSize = (int) this.fileInfo.size; - zip32DataDescriptor.uncompressedSize = (int) this.fileInfo.size; - - ByteBuffer buffer = ByteBuffer.allocate(ZIP_32_DATA_DESCRIPTOR_SIZE); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(zip32DataDescriptor.signature); - buffer.putInt((int) zip32DataDescriptor.crc32); - buffer.putInt(zip32DataDescriptor.compressedSize); - buffer.putInt(zip32DataDescriptor.uncompressedSize); - - this.writer.write(buffer.array()); - - this.currentOffset += 30 + this.fileInfo.filename.length() + this.fileInfo.size + ZIP_32_DATA_DESCRIPTOR_SIZE; - } + cdFileHeader.write(out, fileInfo.filename.getBytes(StandardCharsets.UTF_8)); - this.fileInfo = new FileInfo(); + if (fileInfo.isZip64) { + Zip64GlobalExtendedInfoExtraField zip64ExtendedInfoExtraField = new Zip64GlobalExtendedInfoExtraField(); + zip64ExtendedInfoExtraField.originalSize = fileInfo.size; + zip64ExtendedInfoExtraField.compressedSize = fileInfo.size; + zip64ExtendedInfoExtraField.localFileHeaderOffset = fileInfo.offset; + zip64ExtendedInfoExtraField.write(out); } } - public void finish() throws IOException { - writeCentralDirectory(); - writeEndOfCentralDirectory(); - } + private FileInfo writeStream(String name, java.io.InputStream data, CountingOutputStream out) throws IOException { + var startPosition = out.position; + long fileTime, fileDate; + fileTime = fileDate = getTimeDateUnMSDosFormat(); - private void writeCentralDirectory() throws IOException { - this.lastOffsetCDFileHeader = this.currentOffset; - - for (FileInfo fileInfo : this.fileInfoEntries) { - CDFileHeader cdFileHeader = new CDFileHeader(); - cdFileHeader.signature = 0x02014b50; - cdFileHeader.versionCreated = ZIP_VERSION; - cdFileHeader.versionNeeded = ZIP_VERSION; - cdFileHeader.generalPurposeBitFlag = fileInfo.flag; - cdFileHeader.compressionMethod = 0; - cdFileHeader.lastModifiedTime = fileInfo.fileTime; - cdFileHeader.lastModifiedDate = fileInfo.fileDate; - cdFileHeader.crc32 = (int) fileInfo.crc; - cdFileHeader.filenameLength = (short) fileInfo.filename.length(); - cdFileHeader.fileCommentLength = 0; - cdFileHeader.diskNumberStart = 0; - cdFileHeader.internalFileAttributes = 0; - cdFileHeader.externalFileAttributes = 0; - cdFileHeader.compressedSize = (int) fileInfo.size; - cdFileHeader.uncompressedSize = (int) fileInfo.size; - cdFileHeader.localHeaderOffset = (int) fileInfo.offset; - cdFileHeader.extraFieldLength = 0; - - if (this.isZip64) { - cdFileHeader.compressedSize = ZIP_64_MAGIC_VAL; - cdFileHeader.uncompressedSize = ZIP_64_MAGIC_VAL; - cdFileHeader.localHeaderOffset = ZIP_64_MAGIC_VAL; - cdFileHeader.extraFieldLength = ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE; + var nameBytes = name.getBytes(StandardCharsets.UTF_8); + LocalFileHeader localFileHeader = new LocalFileHeader(); + localFileHeader.lastModifiedTime = (int) fileTime; + localFileHeader.lastModifiedDate = (int) fileDate; + localFileHeader.filenameLength = (short) nameBytes.length; + localFileHeader.crc32 = 0; + localFileHeader.generalPurposeBitFlag = (1 << 3) | (1 << 11); // we are using the data descriptor and we are using UTF-8 + localFileHeader.compressedSize = ZIP_64_MAGIC_VAL; + localFileHeader.uncompressedSize = ZIP_64_MAGIC_VAL; + localFileHeader.extraFieldLength = 0; + + localFileHeader.write(out, nameBytes); + + var crc = new CRC32(); + var outputStream = new OutputStream() { + @Override + public void write(int b) throws IOException { + crc.update(b); + out.write(b); } - ByteBuffer buffer = ByteBuffer.allocate(46 + fileInfo.filename.length()); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(cdFileHeader.signature); - buffer.putShort((short) cdFileHeader.versionCreated); - buffer.putShort((short) cdFileHeader.versionNeeded); - buffer.putShort((short) cdFileHeader.generalPurposeBitFlag); - buffer.putShort((short) cdFileHeader.compressionMethod); - buffer.putShort((short) cdFileHeader.lastModifiedTime); - buffer.putShort((short) cdFileHeader.lastModifiedDate); - buffer.putInt((int) cdFileHeader.crc32); - buffer.putInt(cdFileHeader.compressedSize); - buffer.putInt(cdFileHeader.uncompressedSize); - buffer.putShort(cdFileHeader.filenameLength); - buffer.putShort(cdFileHeader.fileCommentLength); - buffer.putShort(cdFileHeader.diskNumberStart); - buffer.putShort(cdFileHeader.internalFileAttributes); - buffer.putInt(cdFileHeader.externalFileAttributes); - buffer.putInt(cdFileHeader.localHeaderOffset); - buffer.putShort(cdFileHeader.extraFieldLength); - buffer.put(fileInfo.filename.getBytes(StandardCharsets.UTF_8)); - - this.writer.write(buffer.array()); - - if (this.isZip64) { - Zip64ExtendedInfoExtraField zip64ExtendedInfoExtraField = new Zip64ExtendedInfoExtraField(); - zip64ExtendedInfoExtraField.signature = 0x0001; - zip64ExtendedInfoExtraField.size = ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE - 4; - zip64ExtendedInfoExtraField.originalSize = fileInfo.size; - zip64ExtendedInfoExtraField.compressedSize = fileInfo.size; - zip64ExtendedInfoExtraField.localFileHeaderOffset = fileInfo.offset; - - buffer = ByteBuffer.allocate(ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putShort((short) zip64ExtendedInfoExtraField.signature); - buffer.putShort((short) zip64ExtendedInfoExtraField.size); - buffer.putLong(zip64ExtendedInfoExtraField.originalSize); - buffer.putLong(zip64ExtendedInfoExtraField.compressedSize); - buffer.putLong(zip64ExtendedInfoExtraField.localFileHeaderOffset); - - this.writer.write(buffer.array()); + @Override + public void write(byte[] b) throws IOException { + crc.update(b); + out.write(b); } - this.lastOffsetCDFileHeader += 46 + fileInfo.filename.length(); - - if (this.isZip64) { - this.lastOffsetCDFileHeader += ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE; + @Override + public void write(byte[] b, int off, int len) throws IOException { + crc.update(b, off, len); + out.write(b, off, len); } - } + }; + + long fileStart = out.position; + data.transferTo(outputStream); + long fileSize = out.position - fileStart; + long crcValue = crc.getValue(); + + // Write Zip64 data descriptor + Zip64DataDescriptor dataDescriptor = new Zip64DataDescriptor(); + dataDescriptor.crc32 = crcValue; + dataDescriptor.compressedSize = fileSize; + dataDescriptor.uncompressedSize = fileSize; + dataDescriptor.write(out); + + var fileInfo = new FileInfo(); + fileInfo.offset = startPosition; + fileInfo.flag = (short) localFileHeader.generalPurposeBitFlag; + fileInfo.size = fileSize; + fileInfo.crc = crcValue; + fileInfo.filename = name; + fileInfo.fileTime = (short) fileTime; + fileInfo.fileDate = (short) fileDate; + fileInfo.isZip64 = true; + + return fileInfo; } - private void writeEndOfCentralDirectory() throws IOException { - if (this.isZip64) { - writeZip64EndOfCentralDirectory(); - writeZip64EndOfCentralDirectoryLocator(); + private FileInfo writeByteArray(String name, byte[] data, CountingOutputStream out) throws IOException { + var startPosition = out.position; + long fileTime, fileDate; + fileTime = fileDate = getTimeDateUnMSDosFormat(); + + var crc = new CRC32(); + crc.update(data); + var crcValue = crc.getValue(); + + var nameBytes = name.getBytes(StandardCharsets.UTF_8); + LocalFileHeader localFileHeader = new LocalFileHeader(); + localFileHeader.lastModifiedTime = (int) fileTime; + localFileHeader.lastModifiedDate = (int) fileDate; + localFileHeader.filenameLength = (short) nameBytes.length; + localFileHeader.generalPurposeBitFlag = 0; + localFileHeader.crc32 = (int) crcValue; + localFileHeader.compressedSize = data.length; + localFileHeader.uncompressedSize = data.length; + localFileHeader.extraFieldLength = 0; + + localFileHeader.write(out, name.getBytes(StandardCharsets.UTF_8)); + + out.write(data); + + var fileInfo = new FileInfo(); + fileInfo.offset = startPosition; + fileInfo.flag = (1 << 11); + fileInfo.size = data.length; + fileInfo.crc = crcValue; + fileInfo.filename = name; + fileInfo.fileTime = (short) fileTime; + fileInfo.fileDate = (short) fileDate; + fileInfo.isZip64 = false; + + return fileInfo; + } + + + private void writeEndOfCentralDirectory(boolean hasZip64Entry, long numEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, CountingOutputStream out) throws IOException { + var isZip64 = hasZip64Entry + || (numEntries & ~0xFF) != 0 + || (startOfCentralDirectory & ~0xFFFF) != 0 + || (sizeOfCentralDirectory & ~0xFFFF) != 0; + + if (isZip64) { + var endPosition = out.position; + writeZip64EndOfCentralDirectory(numEntries, startOfCentralDirectory, sizeOfCentralDirectory, out); + writeZip64EndOfCentralDirectoryLocator(endPosition, out); } EndOfCDRecord endOfCDRecord = new EndOfCDRecord(); - endOfCDRecord.signature = 0x06054b50; - endOfCDRecord.diskNumber = 0; - endOfCDRecord.startDiskNumber = 0; - endOfCDRecord.numberOfCDRecordEntries = (short) this.fileInfoEntries.size(); - endOfCDRecord.totalCDRecordEntries = (short) this.fileInfoEntries.size(); - endOfCDRecord.centralDirectoryOffset = (int) this.currentOffset; - endOfCDRecord.sizeOfCentralDirectory = (int) (this.lastOffsetCDFileHeader - this.currentOffset); - endOfCDRecord.commentLength = 0; - - ByteBuffer buffer = ByteBuffer.allocate(22); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(endOfCDRecord.signature); - buffer.putShort(endOfCDRecord.diskNumber); - buffer.putShort(endOfCDRecord.startDiskNumber); - buffer.putShort(endOfCDRecord.numberOfCDRecordEntries); - buffer.putShort(endOfCDRecord.totalCDRecordEntries); - buffer.putInt(endOfCDRecord.sizeOfCentralDirectory); - buffer.putInt(endOfCDRecord.centralDirectoryOffset); - buffer.putShort(endOfCDRecord.commentLength); - - this.writer.write(buffer.array()); + endOfCDRecord.numberOfCDRecordEntries = isZip64 ? ZIP_64_MAGIC_VAL : (short) numEntries; + endOfCDRecord.totalCDRecordEntries = isZip64 ? ZIP_64_MAGIC_VAL : (short) numEntries; + endOfCDRecord.centralDirectoryOffset = isZip64 ? ZIP_64_MAGIC_VAL : (int) startOfCentralDirectory; + endOfCDRecord.sizeOfCentralDirectory = isZip64 ? ZIP_64_MAGIC_VAL : (int) sizeOfCentralDirectory; + + endOfCDRecord.write(out); } - private void writeZip64EndOfCentralDirectory() throws IOException { + private void writeZip64EndOfCentralDirectory(long numEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, OutputStream out) throws IOException { Zip64EndOfCDRecord zip64EndOfCDRecord = new Zip64EndOfCDRecord(); - zip64EndOfCDRecord.signature = 0x06064b50; - zip64EndOfCDRecord.recordSize = ZIP_64_EXTENDED_INFO_EXTRA_FIELD_SIZE - 12; - zip64EndOfCDRecord.versionMadeBy = ZIP_VERSION; - zip64EndOfCDRecord.versionToExtract = ZIP_VERSION; - zip64EndOfCDRecord.diskNumber = 0; - zip64EndOfCDRecord.startDiskNumber = 0; - zip64EndOfCDRecord.numberOfCDRecordEntries = this.fileInfoEntries.size(); - zip64EndOfCDRecord.totalCDRecordEntries = this.fileInfoEntries.size(); - zip64EndOfCDRecord.centralDirectorySize = this.lastOffsetCDFileHeader - this.currentOffset; - zip64EndOfCDRecord.startingDiskCentralDirectoryOffset = this.currentOffset; - - ByteBuffer buffer = ByteBuffer.allocate(56); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(zip64EndOfCDRecord.signature); - buffer.putLong(zip64EndOfCDRecord.recordSize); - buffer.putShort(zip64EndOfCDRecord.versionMadeBy); - buffer.putShort(zip64EndOfCDRecord.versionToExtract); - buffer.putInt(zip64EndOfCDRecord.diskNumber); - buffer.putInt(zip64EndOfCDRecord.startDiskNumber); - buffer.putLong(zip64EndOfCDRecord.numberOfCDRecordEntries); - buffer.putLong(zip64EndOfCDRecord.totalCDRecordEntries); - buffer.putLong(zip64EndOfCDRecord.centralDirectorySize); - buffer.putLong(zip64EndOfCDRecord.startingDiskCentralDirectoryOffset); - - this.writer.write(buffer.array()); + zip64EndOfCDRecord.numberOfCDRecordEntries = numEntries; + zip64EndOfCDRecord.totalCDRecordEntries = numEntries; + zip64EndOfCDRecord.centralDirectorySize = sizeOfCentralDirectory; + zip64EndOfCDRecord.startingDiskCentralDirectoryOffset = startOfCentralDirectory; + + zip64EndOfCDRecord.write(out); } - private void writeZip64EndOfCentralDirectoryLocator() throws IOException { + private void writeZip64EndOfCentralDirectoryLocator(long startOfEndOfCD, OutputStream out) throws IOException { Zip64EndOfCDRecordLocator zip64EndOfCDRecordLocator = new Zip64EndOfCDRecordLocator(); - zip64EndOfCDRecordLocator.signature = 0x07064b50; - zip64EndOfCDRecordLocator.CDStartDiskNumber = 0; - zip64EndOfCDRecordLocator.CDOffset = this.lastOffsetCDFileHeader; - zip64EndOfCDRecordLocator.numberOfDisks = 1; - - ByteBuffer buffer = ByteBuffer.allocate(20); - buffer.order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt(zip64EndOfCDRecordLocator.signature); - buffer.putInt(zip64EndOfCDRecordLocator.CDStartDiskNumber); - buffer.putLong(zip64EndOfCDRecordLocator.CDOffset); - buffer.putInt(zip64EndOfCDRecordLocator.numberOfDisks); - - this.writer.write(buffer.array()); + zip64EndOfCDRecordLocator.CDOffset = startOfEndOfCD; + + zip64EndOfCDRecordLocator.write(out); + } + + private static class CountingOutputStream extends OutputStream { + + private final OutputStream inner; + private long position; + + public CountingOutputStream(OutputStream inner) { + this.inner = inner; + this.position = 0; + } + + @Override + public void write(int b) throws IOException { + inner.write(b); + position += 1; + } + + @Override + public void write(byte[] b) throws IOException { + inner.write(b); + position += b.length; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + inner.write(b, off, len); + position += len; + } } - private long getTimeDateUnMSDosFormat() { + private static long getTimeDateUnMSDosFormat() { LocalDateTime now = LocalDateTime.now(); int timeInDos = now.getHour() << 11 | now.getMinute() << 5 | Math.max(now.getSecond() / HALF_SECOND, DEFAULT_SECOND_VALUE); int dateInDos = (now.getYear() - BASE_YEAR) << 9 | ((now.getMonthValue() + 1) << MONTH_SHIFT) | now.getDayOfMonth(); @@ -368,97 +295,238 @@ private long getTimeDateUnMSDosFormat() { } private static class LocalFileHeader { - int signature; - int version; + final int signature = 0x04034b50; + final int version = ZIP_VERSION; int generalPurposeBitFlag; - int compressionMethod; + final int compressionMethod = 0; int lastModifiedTime; int lastModifiedDate; int crc32; int compressedSize; int uncompressedSize; + short filenameLength; - short extraFieldLength; - } + short extraFieldLength = 0; - private static class Zip64ExtendedLocalInfoExtraField { - short signature; - short size; - long originalSize; - long compressedSize; + void write(OutputStream out, byte[] filename) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(30 + filename.length); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putShort((short) version); + buffer.putShort((short) generalPurposeBitFlag); + buffer.putShort((short) compressionMethod); + buffer.putShort((short) lastModifiedTime); + buffer.putShort((short) lastModifiedDate); + buffer.putInt(crc32); + buffer.putInt(compressedSize); + buffer.putInt(uncompressedSize); + buffer.putShort(filenameLength); + buffer.putShort(extraFieldLength); + buffer.put(filename); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class Zip64DataDescriptor { - int signature; + final int signature = 0x08074b50; long crc32; long compressedSize; long uncompressedSize; + + void write(OutputStream out) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(ZIP_64_DATA_DESCRIPTOR_SIZE); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putInt((int) crc32); + buffer.putLong(compressedSize); + buffer.putLong(uncompressedSize); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class Zip32DataDescriptor { - int signature; + final int signature = 0x08074b50; + ; long crc32; int compressedSize; int uncompressedSize; + + void write(OutputStream out) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(ZIP_32_DATA_DESCRIPTOR_SIZE); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putInt((int) crc32); + buffer.putInt(compressedSize); + buffer.putInt(uncompressedSize); + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class CDFileHeader { - int signature; - int versionCreated; - int versionNeeded; + final int signature = 0x02014b50; + final short versionCreated = ZIP_VERSION; + final short versionNeeded = ZIP_VERSION; int generalPurposeBitFlag; - int compressionMethod; + final int compressionMethod = 0; int lastModifiedTime; int lastModifiedDate; int crc32; int compressedSize; int uncompressedSize; short filenameLength; - short fileCommentLength; - short diskNumberStart; - short internalFileAttributes; - int externalFileAttributes; - int localHeaderOffset; short extraFieldLength; + final short fileCommentLength = 0; + final short diskNumberStart = 0; + final short internalFileAttributes = 0; + final int externalFileAttributes = 0; + int localHeaderOffset; + + void write(OutputStream out, byte[] filename) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(46 + filename.length); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putShort(versionCreated); + buffer.putShort(versionNeeded); + buffer.putShort((short) generalPurposeBitFlag); + buffer.putShort((short) compressionMethod); + buffer.putShort((short) lastModifiedTime); + buffer.putShort((short) lastModifiedDate); + buffer.putInt(crc32); + buffer.putInt(compressedSize); + buffer.putInt(uncompressedSize); + buffer.putShort((short) filename.length); + buffer.putShort(extraFieldLength); + buffer.putShort(fileCommentLength); + buffer.putShort(diskNumberStart); + buffer.putShort(internalFileAttributes); + buffer.putInt(externalFileAttributes); + buffer.putInt(localHeaderOffset); + buffer.put(filename); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } - private static class Zip64ExtendedInfoExtraField { - short signature; - short size; + private static class Zip64LocalExtendedInfoExtraField { + final short signature = 0x0001; + final short size = ZIP_64_LOCAL_EXTENDED_INFO_EXTRA_FIELD_SIZE; + long originalSize; + long compressedSize; + + void write(OutputStream out) throws IOException { + var buffer = ByteBuffer.allocate(ZIP_64_LOCAL_EXTENDED_INFO_EXTRA_FIELD_SIZE); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putShort(signature); + buffer.putShort(size); + buffer.putLong(originalSize); + buffer.putLong(compressedSize); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } + } + + private static class Zip64GlobalExtendedInfoExtraField { + final short signature = 0x0001; + final short size = ZIP_64_GLOBAL_EXTENDED_INFO_EXTRA_FIELD_SIZE - 4; long originalSize; long compressedSize; long localFileHeaderOffset; + + void write(OutputStream out) throws IOException { + var buffer = ByteBuffer.allocate(ZIP_64_GLOBAL_EXTENDED_INFO_EXTRA_FIELD_SIZE); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putShort(signature); + buffer.putShort(size); + buffer.putLong(compressedSize); + buffer.putLong(originalSize); + buffer.putLong(localFileHeaderOffset); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class EndOfCDRecord { - int signature; - short diskNumber; - short startDiskNumber; + final int signature = 0x06054b50; + final short diskNumber = 0; + final short startDiskNumber = 0; short numberOfCDRecordEntries; short totalCDRecordEntries; int sizeOfCentralDirectory; int centralDirectoryOffset; - short commentLength; + final short commentLength = 0; + + void write(OutputStream out) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(22); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putShort(diskNumber); + buffer.putShort(startDiskNumber); + buffer.putShort(numberOfCDRecordEntries); + buffer.putShort(totalCDRecordEntries); + buffer.putInt(sizeOfCentralDirectory); + buffer.putInt(centralDirectoryOffset); + buffer.putShort(commentLength); + + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class Zip64EndOfCDRecord { - int signature; - long recordSize; - short versionMadeBy; - short versionToExtract; - int diskNumber; - int startDiskNumber; + final int signature = 0x06064b50; + final long recordSize = ZIP_64_END_OF_CD_RECORD_SIZE - 12; + final short versionMadeBy = ZIP_VERSION; + final short versionToExtract = ZIP_VERSION; + final int diskNumber = 0; + final int startDiskNumber = 0; long numberOfCDRecordEntries; long totalCDRecordEntries; long centralDirectorySize; long startingDiskCentralDirectoryOffset; + + void write(OutputStream out) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(56); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putLong(recordSize); + buffer.putShort(versionMadeBy); + buffer.putShort(versionToExtract); + buffer.putInt(diskNumber); + buffer.putInt(startDiskNumber); + buffer.putLong(numberOfCDRecordEntries); + buffer.putLong(totalCDRecordEntries); + buffer.putLong(centralDirectorySize); + buffer.putLong(startingDiskCentralDirectoryOffset); + + out.write(buffer.array()); + } } + private static class Zip64EndOfCDRecordLocator { - int signature; - int CDStartDiskNumber; + final int signature = 0x07064b50; + final int CDStartDiskNumber = 0; long CDOffset; - int numberOfDisks; + final int numberOfDisks = 1; + + void write(OutputStream out) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(20); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(signature); + buffer.putInt(CDStartDiskNumber); + buffer.putLong(CDOffset); + buffer.putInt(numberOfDisks); + out.write(buffer.array()); + assert buffer.position() == buffer.capacity(); + } } private static class FileInfo { @@ -469,5 +537,6 @@ private static class FileInfo { short fileTime; short fileDate; short flag; + boolean isZip64; } } \ No newline at end of file diff --git a/sdk/src/test/java/io/opentdf/platform/sdk/ZipReaderTest.java b/sdk/src/test/java/io/opentdf/platform/sdk/ZipReaderTest.java index 83ae6add..5d5311fa 100644 --- a/sdk/src/test/java/io/opentdf/platform/sdk/ZipReaderTest.java +++ b/sdk/src/test/java/io/opentdf/platform/sdk/ZipReaderTest.java @@ -1,63 +1,137 @@ package io.opentdf.platform.sdk; -import org.junit.jupiter.api.BeforeEach; +import com.google.gson.Gson; +import org.apache.commons.compress.archivers.zip.Zip64Mode; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.Paths; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertNotNull; public class ZipReaderTest { - private ZipReader zipReader; - private ByteBuffer buffer; - private RandomAccessFile raf; - private FileChannel fileChannel; + @Test + public void testReadingExistingZip() throws Exception { + try (RandomAccessFile raf = new RandomAccessFile("src/test/resources/sample.txt.tdf", "r")) { + var fileChannel = raf.getChannel(); + var zipReader = new ZipReader(fileChannel); + var entries = zipReader.getEntries(); + assertThat(entries.size()).isEqualTo(2); + for (var entry: entries) { + var stream = new ByteArrayOutputStream(); + if (entry.getName().endsWith(".json")) { + entry.getData().transferTo(stream); + var data = stream.toString(StandardCharsets.UTF_8); + var map = new Gson().fromJson(data, Map.class); + assertThat(map.get("encryptionInformation")).isNotNull(); + } + } + } + } @Test - public void testZipReader() throws Exception { - RandomAccessFile raf = new RandomAccessFile("src/test/resources/sample.txt.tdf", "r"); - FileChannel fileChannel = raf.getChannel(); - int bufferSize = 1024; - long fileSize = fileChannel.size(); - long position = fileSize - bufferSize; - if (position < 0) { - position = fileSize; + public void testReadingAFileWrittenUsingCommons() throws IOException { + SeekableInMemoryByteChannel outputChannel = new SeekableInMemoryByteChannel(); + ZipArchiveOutputStream zip = new ZipArchiveOutputStream(outputChannel); + zip.setUseZip64(Zip64Mode.Always); + ZipArchiveEntry entry1 = new ZipArchiveEntry("the first entry"); + entry1.setMethod(0); + zip.putArchiveEntry(entry1); + new ByteArrayInputStream("this is the first entry contents".getBytes(StandardCharsets.UTF_8)).transferTo(zip); + zip.closeArchiveEntry(); + ZipArchiveEntry entry2 = new ZipArchiveEntry("the second entry"); + entry2.setMethod(0); + zip.putArchiveEntry(entry2); + new ByteArrayInputStream("this is the second entry contents".getBytes(StandardCharsets.UTF_8)).transferTo(zip); + zip.closeArchiveEntry(); + zip.close(); + + SeekableInMemoryByteChannel inputChannel = new SeekableInMemoryByteChannel(outputChannel.array()); + + var reader = new ZipReader(inputChannel); + + for (ZipReader.Entry entry: reader.getEntries()) { + try (var data = entry.getData()) { + var bytes = new ByteArrayOutputStream(); + data.transferTo(bytes); + + var stringData = bytes.toString(StandardCharsets.UTF_8); + if (entry.getName().equals("the first entry")) { + assertThat(stringData).isEqualTo("this is the first entry contents"); + } else { + assertThat(entry.getName()).isEqualTo("the second entry"); + assertThat(stringData).isEqualTo("this is the second entry contents"); + } + } } + } - ByteBuffer buffer = ByteBuffer.allocate((int)bufferSize); - fileChannel.position(position); - fileChannel.read(buffer); - buffer.flip(); - - ZipReader zipReader = new ZipReader(); - zipReader.readEndOfCentralDirectory(buffer); - buffer.clear(); - long centralDirectoryOffset = zipReader.getCDOffset(); - int numEntries = zipReader.getNumEntries(); - for (int i = 0; i < numEntries; i++) { - fileChannel.position(centralDirectoryOffset); - fileChannel.read(buffer); - buffer.flip(); - long offset = zipReader.readCentralDirectoryFileHeader(buffer); - buffer.clear(); - fileChannel.position(offset); - fileChannel.read(buffer); - buffer.flip(); - zipReader.readLocalFileHeader(buffer); - centralDirectoryOffset += 46 + zipReader.getFileNameLength() + zipReader.getExtraFieldLength(); - buffer.clear(); + @Test + public void testReadingAndWritingRandomFiles() throws IOException { + Random r = new Random(); + int numEntries = r.nextInt(500) + 10; + var testData = IntStream.range(0, numEntries) + .mapToObj(ignored -> { + int fileNameLength = r.nextInt(1000); + String name = IntStream.range(0, fileNameLength) + .mapToObj(idx -> { + var chars = "abcdefghijklmnopqrstuvwxyz ≈ç´ƒ∆∂߃åˆß∂øƒ¨åß∂∆˚¬…∆˚¬ˆøπ¨πøƒ∂åß˚¬…∆¬…ˆøåπƒ∆"; + var randIdx = r.nextInt(chars.length()); + return chars.substring(randIdx, randIdx + 1); + }) + .collect(Collectors.joining()); + int fileSize = r.nextInt(3000); + byte[] fileContent = new byte[fileSize]; + r.nextBytes(fileContent); + + return new Object[] {name, fileContent}; + }).collect(Collectors.toList()); + + ZipWriter writer = new ZipWriter(); + HashMap namesToData = new HashMap<>(); + for (var data: testData) { + var fileName = (String)data[0]; + var content = (byte[])data[1]; + + if (namesToData.containsKey(fileName)) { + continue; + } + + namesToData.put(fileName, content); + + if (r.nextBoolean()) { + writer = writer.file(fileName, content); + } else { + writer = writer.file(fileName, new ByteArrayInputStream(content)); + } } - assertEquals(2, zipReader.getNumEntries()); - assertNotNull(zipReader.getFileNameLength()); - assertNotNull(zipReader.getCDOffset()); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.build(out); + + var channel = new SeekableInMemoryByteChannel(out.toByteArray()); - raf.close(); + ZipReader reader = new ZipReader(channel); + + for (var entry: reader.getEntries()) { + assertThat(namesToData).containsKey(entry.getName()); + var zipData = new ByteArrayOutputStream(); + entry.getData().transferTo(zipData); + assertThat(zipData.toByteArray()).isEqualTo(namesToData.get(entry.getName())); + } } } \ No newline at end of file diff --git a/sdk/src/test/java/io/opentdf/platform/sdk/ZipWriterTest.java b/sdk/src/test/java/io/opentdf/platform/sdk/ZipWriterTest.java index 62010e6b..53cf0fdd 100644 --- a/sdk/src/test/java/io/opentdf/platform/sdk/ZipWriterTest.java +++ b/sdk/src/test/java/io/opentdf/platform/sdk/ZipWriterTest.java @@ -1,46 +1,157 @@ package io.opentdf.platform.sdk; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.jetbrains.annotations.NotNull; +import org.junit.Ignore; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.StandardOpenOption; +import java.util.Random; +import java.util.zip.CRC32; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; public class ZipWriterTest { @Test public void writesMultipleFilesToArchive() throws IOException { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ZipWriter archiveWriter = new ZipWriter(outputStream); - - String filename1 = "file1.txt"; - String content1 = "Hello, world!"; - archiveWriter.addHeader(filename1, content1.getBytes(StandardCharsets.UTF_8).length); - archiveWriter.addData(content1.getBytes(StandardCharsets.UTF_8)); - archiveWriter.finish(); - - String filename2 = "file2.txt"; - String content2 = "This is another file."; - archiveWriter.addHeader(filename2, content2.getBytes(StandardCharsets.UTF_8).length); - archiveWriter.addData(content2.getBytes(StandardCharsets.UTF_8)); - archiveWriter.finish(); - - byte[] zipData = outputStream.toByteArray(); - assertTrue(zipData.length > 0); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new ZipWriter() + .file("file1∞®ƒ両†.txt", "Hello world!".getBytes(StandardCharsets.UTF_8)) + .file("file2.txt", "Here are some more things to look at".getBytes(StandardCharsets.UTF_8)) + .file("the streaming one", new ByteArrayInputStream("this is a long long stream".getBytes(StandardCharsets.UTF_8))) + .build(out); + + + SeekableByteChannel chan = new SeekableInMemoryByteChannel(out.toByteArray()); + ZipFile z = new ZipFile.Builder().setSeekableByteChannel(chan).get(); + var entry1 = z.getEntry("file1∞®ƒ両†.txt"); + assertThat(entry1).isNotNull(); + var entry1Data = getDataStream(z, entry1); + assertThat(entry1Data.toString(StandardCharsets.UTF_8)).isEqualTo("Hello world!"); + + var entry2 = z.getEntry("file2.txt"); + assertThat(entry1).isNotNull(); + assertThat(getDataStream(z, entry2).toString(StandardCharsets.UTF_8)).isEqualTo("Here are some more things to look at"); + + var entry3 = z.getEntry("the streaming one"); + assertThat(entry3).isNotNull(); + assertThat(getDataStream(z, entry3).toString(StandardCharsets.UTF_8)).isEqualTo("this is a long long stream"); + } + @Test + public void createsNonZip64Archive() throws IOException { + // when we create things using only byte arrays we create an archive that is non zip64 + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new ZipWriter() + .file("file1∞®ƒ両†.txt", "Hello world!".getBytes(StandardCharsets.UTF_8)) + .file("file2.txt", "Here are some more things to look at".getBytes(StandardCharsets.UTF_8)) + .build(out); + + SeekableByteChannel chan = new SeekableInMemoryByteChannel(out.toByteArray()); + ZipFile z = new ZipFile.Builder().setSeekableByteChannel(chan).get(); + var entry1 = z.getEntry("file1∞®ƒ両†.txt"); + assertThat(entry1).isNotNull(); + var entry1Data = getDataStream(z, entry1); + assertThat(entry1Data.toString(StandardCharsets.UTF_8)).isEqualTo("Hello world!"); + + var entry2 = z.getEntry("file2.txt"); + assertThat(entry1).isNotNull(); + assertThat(getDataStream(z, entry2).toString(StandardCharsets.UTF_8)).isEqualTo("Here are some more things to look at"); } @Test - public void throwsExceptionForEmptyFilename() { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ZipWriter archiveWriter = new ZipWriter(outputStream); + @Disabled("this takes a long time and shouldn't run on build machines") + public void testWritingLargeFile() throws IOException { + var random = new Random(); + long fileSize = 4096 + random.nextInt(4096); + var testFile = File.createTempFile("big-file", ""); + testFile.deleteOnExit(); + try (var out = new FileOutputStream(testFile)) { + var buf = new byte[2048]; + for (long i = 0; i < fileSize * 1024 * 1024; i += buf.length) { + random.nextBytes(buf); + out.write(buf); + } + } - String filename = ""; - String content = "Hello, world!"; + var zipFile = File.createTempFile("zip-file", "zip"); + zipFile.deleteOnExit(); + try (var in = new FileInputStream(testFile)) { + try (var out = new FileOutputStream(zipFile)) { + new ZipWriter().file("a big one", in).build(out); + } + } + + var unzippedData = File.createTempFile("big-file-unzipped", ""); + unzippedData.deleteOnExit(); + try (var unzippedStream = new FileOutputStream(unzippedData)) { + try (var chan = FileChannel.open(zipFile.toPath(), StandardOpenOption.READ)) { + ZipFile z = new ZipFile.Builder().setSeekableByteChannel(chan).get(); + var entry = z.getEntry("a big one"); + z.getInputStream(entry).transferTo(unzippedStream); + } + } + + assertThat(unzippedData.length()) + .withFailMessage("extracted file was of the wrong length") + .isEqualTo(testFile.length()); + + + var buf = new byte[2048]; + var unzippedCRC = new CRC32(); + try (var inputStream = new FileInputStream(unzippedData)) { + var read = inputStream.read(buf); + unzippedCRC.update(buf, 0, read); + } + unzippedData.delete(); + + var testFileCRC = new CRC32(); + try (var inputStream = new FileInputStream(testFile)) { + var read = inputStream.read(buf); + testFileCRC.update(buf, 0, read); + } + testFile.delete(); + + assertThat(unzippedCRC.getValue()) + .withFailMessage("the extracted file's CRC differs from the CRC of the test data") + .isEqualTo(testFileCRC.getValue()); + + var ourUnzippedData = File.createTempFile("big-file-we-unzipped", ""); + ourUnzippedData.deleteOnExit(); + try (var unzippedStream = new FileOutputStream(ourUnzippedData)) { + try (var chan = FileChannel.open(zipFile.toPath(), StandardOpenOption.READ)) { + ZipReader reader = new ZipReader(chan); + assertThat(reader.getEntries().size()).isEqualTo(1); + reader.getEntries().get(0).getData().transferTo(unzippedStream); + } + } + + var ourTestFileCRC = new CRC32(); + try (var inputStream = new FileInputStream(ourUnzippedData)) { + var read = inputStream.read(buf); + ourTestFileCRC.update(buf, 0, read); + } + + assertThat(ourTestFileCRC.getValue()) + .withFailMessage("the file we extracted differs from the CRC of the test data") + .isEqualTo(testFileCRC.getValue()); + } - assertThrows(IllegalArgumentException.class, () -> { - archiveWriter.addHeader(filename, content.getBytes(StandardCharsets.UTF_8).length); - }); + @NotNull + private static ByteArrayOutputStream getDataStream(ZipFile z, ZipArchiveEntry entry) throws IOException { + var entry1Data = new ByteArrayOutputStream(); + z.getInputStream(entry).transferTo(entry1Data); + return entry1Data; } } \ No newline at end of file