Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] generate turboOCI index from a gzip stream buffer #264

Merged
merged 8 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ set(ENABLE_MIMIC_VDSO off)
option(BUILD_CURL_FROM_SOURCE "Compile static libcurl" off)
find_package(photon REQUIRED)
find_package(tcmu REQUIRED)
find_package(yamlcpp)
if (NOT yamlcpp_FOUND)
FetchContent_Declare(
yamlcpp
GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git
GIT_TAG 0.8.0
)
FetchContent_MakeAvailable(yamlcpp)
endif()


if(BUILD_TESTING)
enable_testing()
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_library(overlaybd_image_lib
switch_file.cpp
bk_download.cpp
prefetch.cpp
tools/sha256file.cpp
)
target_include_directories(overlaybd_image_lib PUBLIC
${CURL_INCLUDE_DIRS}
Expand Down
38 changes: 1 addition & 37 deletions src/bk_download.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,50 +31,14 @@
#include <unistd.h>
#include "switch_file.h"
#include "image_file.h"
#include "tools/sha256file.h"

using namespace photon::fs;

static constexpr size_t ALIGNMENT = 4096;

namespace BKDL {

std::string sha256sum(const char *fn) {
constexpr size_t BUFFERSIZE = 65536;
int fd = open(fn, O_RDONLY | O_DIRECT);
if (fd < 0) {
LOG_ERROR("failed to open `", fn);
return "";
}
DEFER(close(fd););

struct stat stat;
if (::fstat(fd, &stat) < 0) {
LOG_ERROR("failed to stat `", fn);
return "";
}
SHA256_CTX ctx = {0};
SHA256_Init(&ctx);
__attribute__((aligned(ALIGNMENT))) char buffer[65536];
unsigned char sha[32];
ssize_t recv = 0;
for (off_t offset = 0; offset < stat.st_size; offset += BUFFERSIZE) {
recv = pread(fd, &buffer, BUFFERSIZE, offset);
if (recv < 0) {
LOG_ERROR("io error: `", fn);
return "";
}
if (SHA256_Update(&ctx, buffer, recv) < 0) {
LOG_ERROR("sha256 calculate error: `", fn);
return "";
}
}
SHA256_Final(sha, &ctx);
char res[SHA256_DIGEST_LENGTH * 2];
for (int i = 0; i < SHA256_DIGEST_LENGTH; i++)
sprintf(res + (i * 2), "%02x", sha[i]);
return "sha256:" + std::string(res, SHA256_DIGEST_LENGTH * 2);
}

bool check_downloaded(const std::string &dir) {
std::string fn = dir + "/" + COMMIT_FILE_NAME;
auto lfs = photon::fs::new_localfs_adaptor();
Expand Down
13 changes: 13 additions & 0 deletions src/example_config/stream-conv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
globalConfig:
workDir: /tmp/stream_conv
udsAddr: /var/run/stream_conv.sock
# httpAddr: 127.0.0.1
httpPort: 9101
reusePort: true

logConfig:
level: 1
mode: stdout
rotateNum: 3
limitSizeMB: 10
path: /var/log/overlaybd/stream_convertor.log
1 change: 1 addition & 0 deletions src/overlaybd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_subdirectory(tar)
add_subdirectory(extfs)
add_subdirectory(gzip)
add_subdirectory(gzindex)
add_subdirectory(stream_convertor)
BigVan marked this conversation as resolved.
Show resolved Hide resolved

add_library(overlaybd_lib INTERFACE)
target_include_directories(overlaybd_lib INTERFACE
Expand Down
2 changes: 0 additions & 2 deletions src/overlaybd/gzindex/gzfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <string.h>
#include <zlib.h>
#include <list>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <algorithm>
#include "gzfile_index.h"
Expand Down Expand Up @@ -356,7 +355,6 @@ ssize_t GzFile::pread(void *buf, size_t count, off_t offset) {
if (p == nullptr) {
LOG_ERRNO_RETURN(0, -1, "Failed to seek_index(,`)", offset);
}
//LOG_DEBUG("offset:`, index->de_pos:", offset, p->de_pos+0);

return extract(p, offset, (unsigned char*)buf, count);
}
Expand Down
6 changes: 5 additions & 1 deletion src/overlaybd/gzindex/gzfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

#pragma once
#include "photon/fs/filesystem.h"
#include "gzfile_index.h"


extern photon::fs::IFile* new_gzfile(photon::fs::IFile* gzip_file, photon::fs::IFile* index, bool ownership = false);

//chunksize:
Expand All @@ -32,6 +35,7 @@ extern photon::fs::IFile* new_gzfile(photon::fs::IFile* gzip_file, photon::fs::I
//0: no compression
//1: best speed
//9: best compression
extern int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t chunk_size=1048576, int dict_compress_algo=1, int dict_compress_level=6);
extern int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path,
off_t chunk_size=GZ_CHUNK_SIZE, int dict_compress_algo=GZ_DICT_COMPERSS_ALGO, int dict_compress_level=GZ_COMPRESS_LEVEL);

bool is_gzfile(photon::fs::IFile* file);
18 changes: 18 additions & 0 deletions src/overlaybd/gzindex/gzfile_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@
#include <string>
#include <iostream>
#include <sstream>
#include <sys/types.h>
#include <vector>
#include <zlib.h>
#include "photon/common/checksum/crc32c.h"
#include "photon/fs/filesystem.h"

#define GZ_CHUNK_SIZE 1048576
#define GZ_DICT_COMPERSS_ALGO 1
#define GZ_COMPRESS_LEVEL 6

#define WINSIZE 32768U
#define DEFLATE_BLOCK_UNCOMPRESS_MAX_SIZE 65536U
#define GZFILE_INDEX_MAGIC "ddgzidx"
Expand Down Expand Up @@ -76,3 +84,13 @@ struct IndexEntry {


typedef std::vector<struct IndexEntry *> INDEX;

class IndexFilterRecorder;
IndexFilterRecorder *new_index_filter(IndexFileHeader *h, INDEX *index, photon::fs::IFile *save_as);
void delete_index_filter(IndexFilterRecorder *&);

int init_index_header(photon::fs::IFile* src, IndexFileHeader &h, off_t span, int dict_compress_algo, int dict_compress_level);

int create_index_entry(z_stream strm, IndexFilterRecorder *filter, off_t en_pos, off_t de_pos, unsigned char *window);

int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file, ssize_t gzip_file_size = -1);
95 changes: 65 additions & 30 deletions src/overlaybd/gzindex/gzip_index_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
See the License for the specific language governing permissions and
limitations under the License.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include <sys/fcntl.h>

#include "gzfile_index.h"

#include "photon/common/alog.h"
#include "photon/common/alog-stdstring.h"
#include "photon/fs/localfs.h"
Expand Down Expand Up @@ -179,9 +180,30 @@ static int dict_compress(const IndexFileHeader& h,
return -1;
}

static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &index, photon::fs::IFile* index_file) {
IndexFilterRecorder filter(&h, &index, index_file);
int create_index_entry(z_stream strm, IndexFilterRecorder *filter, off_t en_pos, off_t de_pos, unsigned char *window){
LOG_DEBUG("`",VALUE(strm.data_type));
if ((strm.data_type & EACH_DEFLATE_BLOCK_BIT) && !(strm.data_type & LAST_DEFLATE_BLOCK_BIT)) {
if (filter->record(strm.data_type & 7, en_pos, de_pos, strm.avail_out, window) != 0) {
return -1;
}
}
return 0;
}

IndexFilterRecorder* new_index_filter(IndexFileHeader *h, INDEX *index, photon::fs::IFile *save_as)
{
return new IndexFilterRecorder(h, index, save_as);
}

void delete_index_filter(IndexFilterRecorder *&idx_filter) {
delete idx_filter;
idx_filter = nullptr;
}

static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &index, photon::fs::IFile* index_file) {
// IndexFilterRecorder filter(&h, &index, index_file);
auto filter = new IndexFilterRecorder(&h, &index, index_file);
DEFER(delete filter);
int32_t inbuf_size = WINSIZE;
unsigned char *inbuf = new unsigned char[inbuf_size];
DEFER(delete []inbuf);
Expand Down Expand Up @@ -216,7 +238,6 @@ static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &inde
strm.avail_out = WINSIZE;
strm.next_out = window;
}

ttin += strm.avail_in;
ttout += strm.avail_out;
ret = inflate(&strm, Z_BLOCK);
Expand All @@ -231,12 +252,10 @@ static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &inde
LOG_ERRNO_RETURN(0, -1, "Fail to inflate. ret:`", ret);
}
//TODO Here generate crc32 for uncompressed data block

if ((strm.data_type & EACH_DEFLATE_BLOCK_BIT) && !(strm.data_type & LAST_DEFLATE_BLOCK_BIT)) {
if (filter.record(strm.data_type & 7, ttin, ttout, strm.avail_out, window) != 0) {
LOG_ERRNO_RETURN(ret, -1, "Failed to add_index_entry");
}
if (create_index_entry(strm, filter, ttin, ttout, window) != 0){
LOG_ERRNO_RETURN(ret, -1, "Failed to add_index_entry");
}

} while (strm.avail_in != 0);
} while (ret != Z_STREAM_END);
return 0;
Expand All @@ -258,15 +277,20 @@ static int get_compressed_index(const IndexFileHeader& h, const INDEX& index, un
out_len = index_len;
return 0;
}

LOG_INFO("index crc: `", crc32(0, buf, index_len));
return zlib_compress(h.dict_compress_level, buf, index_len, out, out_len);
}

static int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file) {
int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file, ssize_t gzip_file_size) {
int indx_cmpr_buf_len = index.size() * sizeof(IndexEntry) * 2 + 4096;
unsigned char *buf = new unsigned char[indx_cmpr_buf_len];
DEFER(delete []buf);

if (gzip_file_size != -1) {
LOG_INFO("save gzip file size: `", gzip_file_size);
h.gzip_file_size = gzip_file_size;
}

if (get_compressed_index(h, index, buf, indx_cmpr_buf_len) != 0) {
LOG_ERROR_RETURN(0, -1, "Failed to get_compress_index");
}
Expand All @@ -293,6 +317,28 @@ static int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFil
return 0;
}

int init_index_header(photon::fs::IFile* src, IndexFileHeader &h, off_t span, int dict_compress_algo, int dict_compress_level) {

struct stat sbuf;
if (src->fstat(&sbuf) != 0) {
LOG_ERRNO_RETURN(0, -1, "Faild to gzip_file->fstat()");
}
memset(&h, 0, sizeof(h));
strncpy(h.magic, "ddgzidx", sizeof(h.magic));
h.major_version =1;
h.minor_version =0;
h.dict_compress_algo = dict_compress_algo;
h.dict_compress_level = dict_compress_level;
h.flag=0;
h.index_size = sizeof(struct IndexEntry);
h.span = span;
h.window= WINSIZE;
h.gzip_file_size= sbuf.st_size;
memset(h.reserve, 0, sizeof(h.reserve));
h.index_start = sizeof(h);
return 0;
}

//int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t span, unsigned char dict_compress_algo) {
//int create_gz_index(photon::fs::IFile* gzip_file, off_t span, const char *index_file_path) {
int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t span, int dict_compress_algo, int dict_compress_level) {
Expand All @@ -310,33 +356,22 @@ int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, o
LOG_ERRNO_RETURN(0, -1, "Span is too small, must be greater than 100, span:`", span);
}

struct stat sbuf;
if (gzip_file->fstat(&sbuf) != 0) {
LOG_ERRNO_RETURN(0, -1, "Faild to gzip_file->fstat()");
}

photon::fs::IFile *index_file = photon::fs::open_localfile_adaptor(index_file_path, O_RDWR | O_CREAT | O_TRUNC, 0644);
if (index_file == nullptr) {
LOG_ERROR_RETURN(0, -1, "Failed to open(`)", index_file_path);
}
DEFER(index_file->close());

IndexFileHeader h;
memset(&h, 0, sizeof(h));
strncpy(h.magic, "ddgzidx", sizeof(h.magic));
h.major_version =1;
h.minor_version =0;
h.dict_compress_algo = dict_compress_algo;
h.dict_compress_level = dict_compress_level;
h.flag=0;
h.index_size = sizeof(struct IndexEntry);
h.span = span;
h.window= WINSIZE;
h.gzip_file_size= sbuf.st_size;
memset(h.reserve, 0, sizeof(h.reserve));
h.index_start = sizeof(h);

if (init_index_header(gzip_file, h, span, dict_compress_algo, dict_compress_level) != 0) {
LOG_ERRNO_RETURN(0, -1, "init index header failed.");
}
INDEX index;
DEFER({
for (auto it : index) {
delete it;
}
});
int ret = build_index(h, gzip_file, index, index_file);
if (ret != 0) {
LOG_ERRNO_RETURN(0, -1, "Faild to build_index");
Expand Down
3 changes: 2 additions & 1 deletion src/overlaybd/gzindex/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ link_directories($ENV{GTEST}/lib)

add_executable(gzindex_test test.cpp)
target_include_directories(gzindex_test PUBLIC ${PHOTON_INCLUDE_DIR})
target_link_libraries(gzindex_test gtest gtest_main gflags pthread photon_static gzindex_lib cache_lib)
target_link_libraries(gzindex_test gtest gtest_main gflags pthread photon_static
gzindex_lib gzip_lib cache_lib checksum_lib)

add_test(
NAME gzindex_test
Expand Down
Loading