-
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Decode
.tar.gz
archives using liblzma (via WebAssembly) (#173)
feat: support `.tar.gz` archives using liblzma (via WebAssembly)
- Loading branch information
Showing
11 changed files
with
475 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/bazel-* | ||
/MODULE.bazel.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
load("@aspect_bazel_lib//lib:write_source_files.bzl", "write_source_file") | ||
load(":wasm.bzl", "wasm_binary") | ||
|
||
cc_binary( | ||
name = "xzdec", | ||
srcs = ["xzdec.c"], | ||
linkopts = [ | ||
"-nostdlib", | ||
"-lc", | ||
"-Wl,--no-entry", | ||
], | ||
tags = ["manual"], | ||
deps = ["@xz//:lzma"], | ||
) | ||
|
||
wasm_binary( | ||
name = "xzdec_wasm", | ||
out = "xzdec.wasm", | ||
lib = ":xzdec", | ||
) | ||
|
||
genrule( | ||
name = "xzdec_wasm_gz", | ||
srcs = [":xzdec_wasm"], | ||
outs = ["xzdec_wasm_gz/xzdec.wasm.gz"], | ||
cmd = "cat $< | gzip -9 -k -n > $@", | ||
) | ||
|
||
write_source_file( | ||
name = "write_xzdec_wasm_gz_to_source_tree", | ||
in_file = ":xzdec_wasm_gz", | ||
out_file = "xzdec.wasm.gz", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
module(name = "publish-to-bcr") | ||
|
||
bazel_dep(name = "aspect_bazel_lib", version = "2.9.3") | ||
bazel_dep(name = "platforms", version = "0.0.10") | ||
bazel_dep(name = "toolchains_llvm", version = "1.2.0") | ||
bazel_dep(name = "xz", version = "5.4.5.bcr.5") | ||
|
||
# https://github.com/bazel-contrib/toolchains_llvm/pull/405 | ||
# | ||
# FIXME: Remove when a new `toolchains_llvm` has been released. | ||
git_override( | ||
module_name = "toolchains_llvm", | ||
commit = "bda1c9fbf232b682c30d039f8e4a5e3cf3025d0f", | ||
remote = "https://github.com/bazel-contrib/toolchains_llvm", | ||
) | ||
|
||
llvm = use_extension("@toolchains_llvm//toolchain/extensions:llvm.bzl", "llvm") | ||
llvm.toolchain( | ||
libclang_rt = { | ||
"@libclang_rt-wasm32-wasi//:libclang_rt.builtins-wasm32.a": "wasm32-unknown-unknown/libclang_rt.builtins.a", | ||
}, | ||
llvm_versions = { | ||
# Pin to an older LLVM version due to a stray Homebrew dependency | ||
# in the macOS build of v19.1.0. | ||
# | ||
# https://github.com/llvm/llvm-project/issues/110070 | ||
"": "18.1.8", | ||
}, | ||
stdlib = {"wasm32": "libc"}, | ||
) | ||
llvm.sysroot( | ||
label = "@wasi-sysroot//sysroots/wasm32-wasip2", | ||
targets = ["wasm32"], | ||
) | ||
use_repo(llvm, "llvm_toolchain") | ||
|
||
register_toolchains("@llvm_toolchain//:all") | ||
|
||
wasi_sysroot = use_repo_rule("//:wasm.bzl", "wasi_sysroot") | ||
|
||
wasm32_libclang_rt = use_repo_rule("//:wasm.bzl", "wasm32_libclang_rt") | ||
|
||
wasi_sysroot(name = "wasi-sysroot") | ||
|
||
wasm32_libclang_rt(name = "libclang_rt-wasm32-wasi") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# xz decompressor | ||
|
||
This directory contains a WebAssembly module for decompressing an Xz file using | ||
[liblzma], along with a JavaScript wrapper that adapts `xzdec.wasm` for use | ||
with the Node.js [`node:stream`] library. | ||
|
||
[liblzma]: https://github.com/tukaani-project/xz/tree/v5.4.5/src/liblzma | ||
[`node:stream`]: https://nodejs.org/docs/latest-v18.x/api/stream.html | ||
|
||
Files: | ||
- `xzdec.c` is a thin wrapper around liblzma that exports functions with a | ||
WebAssembly-style ABI. It compiles to `xzdec.wasm`. | ||
- `xzdec.wasm.gz` is a gzip-compressed `xzdec.wasm`, to reduce the size impact | ||
of checking generated build artifacts into Git. | ||
- `xzdec.ts` exports the `decompress(r: stream.Readable, w: stream.Writable)` | ||
function, which instantiates a WebAssembly module from `xzdec.wasm.gz` and | ||
decompresses an Xz bitstream. | ||
|
||
When building a new version of `xzdec.wasm.gz`, or verifying that the checked-in | ||
artifact matches the expected output, Bazel should be run with `-c opt` so that | ||
the compiled output is optimized. | ||
|
||
``` | ||
$ cd src/infrastructure/xzdec | ||
$ bazel build -c opt //:xzdec_wasm_gz | ||
$ diff -s xzdec.wasm.gz bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz | ||
Files xzdec.wasm.gz and bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz are identical | ||
$ | ||
``` | ||
|
||
Note that variations in the gzip compression may cause spurious differences | ||
between `xzdec.wasm.gz` -- in this case, decompressing the two files and | ||
comparing `xzdec.wasm` directly may provide more consistent behavior. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
_WASM_ABIS = [ | ||
"wasm32-wasip2", | ||
] | ||
|
||
def _platform_transition(settings, attr): | ||
return {"//command_line_option:platforms": str(attr._platform)} | ||
|
||
platform_transition = transition( | ||
implementation = _platform_transition, | ||
inputs = [], | ||
outputs = ["//command_line_option:platforms"], | ||
) | ||
|
||
def _wasm_binary(ctx): | ||
out = ctx.outputs.out | ||
if not out: | ||
out = ctx.actions.declare_file(ctx.attr.name + ".wasm") | ||
ctx.actions.symlink(output = out, target_file = ctx.file.lib) | ||
return DefaultInfo(files = depset([out])) | ||
|
||
wasm_binary = rule( | ||
implementation = _wasm_binary, | ||
attrs = { | ||
"lib": attr.label( | ||
allow_single_file = True, | ||
cfg = platform_transition, | ||
), | ||
"out": attr.output(), | ||
"_platform": attr.label( | ||
default = Label("@toolchains_llvm//platforms:wasm32"), | ||
), | ||
"_allowlist_function_transition": attr.label( | ||
default = "@bazel_tools//tools/allowlists/function_transition_allowlist", | ||
), | ||
}, | ||
) | ||
|
||
_SYSROOT_BUILD = """ | ||
filegroup( | ||
name = {name}, | ||
srcs = glob(["include/**/*", "lib/**/*", "share/**/*"], allow_empty=True), | ||
visibility = ["//visibility:public"], | ||
) | ||
""" | ||
|
||
def _wasi_sysroot(ctx): | ||
ctx.download_and_extract( | ||
integrity = "sha256-NRcvfSeZSFsVpGsdh/UKWF2RXsZiCA8AXZkVOlCIjwg=", | ||
stripPrefix = "wasi-sysroot-24.0", | ||
url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sysroot-24.0.tar.gz"], | ||
) | ||
|
||
ctx.file("BUILD.bazel", "") | ||
ctx.file("sysroots/BUILD.bazel", "") | ||
for abi in _WASM_ABIS: | ||
ctx.file("sysroots/%s/BUILD.bazel" % (abi,), _SYSROOT_BUILD.format( | ||
name = repr(abi), | ||
)) | ||
ctx.execute(["mv", "include/" + abi, "sysroots/%s/include" % (abi,)]) | ||
ctx.execute(["mv", "lib/" + abi, "sysroots/%s/lib" % (abi,)]) | ||
ctx.execute(["mv", "share/" + abi, "sysroots/%s/share" % (abi,)]) | ||
|
||
wasi_sysroot = repository_rule( | ||
implementation = _wasi_sysroot, | ||
) | ||
|
||
def _wasm32_libclang_rt(ctx): | ||
ctx.file("BUILD.bazel", """ | ||
exports_files(["libclang_rt.builtins-wasm32.a"]) | ||
filegroup( | ||
name = "libclang_rt-wasm32-wasi", | ||
srcs = ["libclang_rt.builtins-wasm32.a"], | ||
visibility = ["//visibility:public"], | ||
) | ||
""") | ||
|
||
ctx.download_and_extract( | ||
integrity = "sha256-fjPA33WLkEabHePKFY4tCn9xk01YhFJbpqNy3gs7Dsc=", | ||
stripPrefix = "libclang_rt.builtins-wasm32-wasi-24.0", | ||
url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/libclang_rt.builtins-wasm32-wasi-24.0.tar.gz"], | ||
) | ||
|
||
wasm32_libclang_rt = repository_rule( | ||
implementation = _wasm32_libclang_rt, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#include <stdint.h> | ||
#include <stdlib.h> | ||
|
||
#include <lzma.h> | ||
|
||
typedef uint32_t xzdec_lzma_ret; | ||
|
||
struct Xzdec { | ||
lzma_stream stream; | ||
}; | ||
|
||
__attribute__((export_name("xzdec_allocate"))) | ||
uint8_t *xzdec_allocate(uint32_t len) { | ||
return malloc(len); | ||
} | ||
|
||
__attribute__((export_name("xzdec_deallocate"))) | ||
void xzdec_deallocate(uint8_t *ptr) { | ||
free(ptr); | ||
} | ||
|
||
__attribute__((export_name("xzdec_new_stream_decoder"))) | ||
xzdec_lzma_ret xzdec_new_stream_decoder( | ||
uint32_t memlimit, | ||
uint32_t flags, | ||
struct Xzdec **xzdec_ptr | ||
) { | ||
lzma_stream stream = LZMA_STREAM_INIT; | ||
lzma_ret rc = lzma_stream_decoder(&stream, memlimit, flags); | ||
if (rc != LZMA_OK) { | ||
return rc; | ||
} | ||
*xzdec_ptr = malloc(sizeof(struct Xzdec)); | ||
(*xzdec_ptr)->stream = stream; | ||
return LZMA_OK; | ||
} | ||
|
||
__attribute__((export_name("xzdec_drop"))) | ||
void xzdec_drop(struct Xzdec *xzdec) { | ||
lzma_end(&(xzdec->stream)); | ||
free(xzdec); | ||
} | ||
|
||
__attribute__((export_name("xzdec_input_empty"))) | ||
uint32_t xzdec_input_empty(struct Xzdec *xzdec) { | ||
if (xzdec->stream.avail_in == 0) { | ||
return 1; | ||
} | ||
return 0; | ||
} | ||
|
||
__attribute__((export_name("xzdec_set_input"))) | ||
void xzdec_set_input( | ||
struct Xzdec *xzdec, | ||
const uint8_t *input_buf, | ||
uint32_t input_buf_len | ||
) { | ||
xzdec->stream.next_in = input_buf; | ||
xzdec->stream.avail_in = input_buf_len; | ||
} | ||
|
||
__attribute__((export_name("xzdec_next_output"))) | ||
xzdec_lzma_ret xzdec_next_output( | ||
struct Xzdec *xzdec, | ||
uint8_t *output_buf, | ||
uint32_t output_buf_cap, | ||
uint32_t *output_buf_len | ||
) { | ||
xzdec->stream.next_out = output_buf; | ||
xzdec->stream.avail_out = output_buf_cap; | ||
lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_RUN); | ||
*output_buf_len = output_buf_cap - xzdec->stream.avail_out; | ||
return rc; | ||
} | ||
|
||
__attribute__((export_name("xzdec_finish"))) | ||
xzdec_lzma_ret xzdec_finish( | ||
struct Xzdec *xzdec, | ||
uint8_t *output_buf, | ||
uint32_t output_buf_cap, | ||
uint32_t *output_buf_len | ||
) { | ||
xzdec->stream.next_out = output_buf; | ||
xzdec->stream.avail_out = output_buf_cap; | ||
lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_FINISH); | ||
*output_buf_len = output_buf_cap - xzdec->stream.avail_out; | ||
return rc; | ||
} | ||
|
||
// Prevent Clang from wrapping every inserted function and injecting calls | ||
// to `__wasm_call_dtors()`. | ||
void _initialize() { | ||
void __wasm_call_ctors(); | ||
__wasm_call_ctors(); | ||
} |
Oops, something went wrong.