diff --git a/src/domain/release-archive.ts b/src/domain/release-archive.ts index 534b738..72be809 100644 --- a/src/domain/release-archive.ts +++ b/src/domain/release-archive.ts @@ -7,6 +7,7 @@ import path from "node:path"; import { parse as parseUrl } from "node:url"; import tar from "tar"; import { UserFacingError } from "./error.js"; +import { decompress as decompressXz } from "../infrastructure/xzdec/xzdec.js"; import { ModuleFile } from "./module-file.js"; @@ -61,7 +62,7 @@ export class ReleaseArchive { public async extractModuleFile(): Promise { this.extractDir = path.dirname(this._diskPath); - if (this._diskPath.endsWith(".tar.gz")) { + if (this.isSupportedTarball()) { await this.extractReleaseTarball(this.extractDir); } else if (this._diskPath.endsWith(".zip")) { await this.extractReleaseZip(this.extractDir); @@ -81,7 +82,26 @@ export class ReleaseArchive { return new ModuleFile(extractedModulePath); } + private isSupportedTarball(): boolean { + if (this._diskPath.endsWith(".tar.gz")) { + return true; + } + if (this._diskPath.endsWith(".tar.xz")) { + return true; + } + return false; + } + private async extractReleaseTarball(extractDir: string): Promise { + if (this._diskPath.endsWith(".tar.xz")) { + const reader = fs.createReadStream(this._diskPath); + const writer = tar.x({ + cwd: extractDir + }); + await decompressXz(reader, writer); + return; + } + await tar.x({ cwd: extractDir, file: this._diskPath, diff --git a/src/infrastructure/xzdec/.gitignore b/src/infrastructure/xzdec/.gitignore new file mode 100644 index 0000000..2f0f755 --- /dev/null +++ b/src/infrastructure/xzdec/.gitignore @@ -0,0 +1,2 @@ +/bazel-* +/MODULE.bazel.lock diff --git a/src/infrastructure/xzdec/BUILD.bazel b/src/infrastructure/xzdec/BUILD.bazel new file mode 100644 index 0000000..2ae7e30 --- /dev/null +++ b/src/infrastructure/xzdec/BUILD.bazel @@ -0,0 +1,33 @@ +load("@aspect_bazel_lib//lib:write_source_files.bzl", "write_source_file") +load(":wasm.bzl", "wasm_binary") + +cc_binary( + name = "xzdec", + srcs = ["xzdec.c"], + linkopts = [ + "-nostdlib", + "-lc", + "-Wl,--no-entry", + ], + tags = ["manual"], + deps = ["@xz//:lzma"], +) + +wasm_binary( + name = "xzdec_wasm", + out = "xzdec.wasm", + lib = ":xzdec", +) + +genrule( + name = "xzdec_wasm_gz", + srcs = [":xzdec_wasm"], + outs = ["xzdec_wasm_gz/xzdec.wasm.gz"], + cmd = "cat $< | gzip -9 -k -n > $@", +) + +write_source_file( + name = "write_xzdec_wasm_gz_to_source_tree", + in_file = ":xzdec_wasm_gz", + out_file = "xzdec.wasm.gz", +) diff --git a/src/infrastructure/xzdec/MODULE.bazel b/src/infrastructure/xzdec/MODULE.bazel new file mode 100644 index 0000000..9aab199 --- /dev/null +++ b/src/infrastructure/xzdec/MODULE.bazel @@ -0,0 +1,45 @@ +module(name = "publish-to-bcr") + +bazel_dep(name = "aspect_bazel_lib", version = "2.9.3") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "toolchains_llvm", version = "1.2.0") +bazel_dep(name = "xz", version = "5.4.5.bcr.5") + +# https://github.com/bazel-contrib/toolchains_llvm/pull/405 +# +# FIXME: Remove when a new `toolchains_llvm` has been released. +git_override( + module_name = "toolchains_llvm", + commit = "bda1c9fbf232b682c30d039f8e4a5e3cf3025d0f", + remote = "https://github.com/bazel-contrib/toolchains_llvm", +) + +llvm = use_extension("@toolchains_llvm//toolchain/extensions:llvm.bzl", "llvm") +llvm.toolchain( + libclang_rt = { + "@libclang_rt-wasm32-wasi//:libclang_rt.builtins-wasm32.a": "wasm32-unknown-unknown/libclang_rt.builtins.a", + }, + llvm_versions = { + # Pin to an older LLVM version due to a stray Homebrew dependency + # in the macOS build of v19.1.0. + # + # https://github.com/llvm/llvm-project/issues/110070 + "": "18.1.8", + }, + stdlib = {"wasm32": "libc"}, +) +llvm.sysroot( + label = "@wasi-sysroot//sysroots/wasm32-wasip2", + targets = ["wasm32"], +) +use_repo(llvm, "llvm_toolchain") + +register_toolchains("@llvm_toolchain//:all") + +wasi_sysroot = use_repo_rule("//:wasm.bzl", "wasi_sysroot") + +wasm32_libclang_rt = use_repo_rule("//:wasm.bzl", "wasm32_libclang_rt") + +wasi_sysroot(name = "wasi-sysroot") + +wasm32_libclang_rt(name = "libclang_rt-wasm32-wasi") diff --git a/src/infrastructure/xzdec/README.md b/src/infrastructure/xzdec/README.md new file mode 100644 index 0000000..fc70bed --- /dev/null +++ b/src/infrastructure/xzdec/README.md @@ -0,0 +1,33 @@ +# xz decompressor + +This directory contains a WebAssembly module for decompressing an Xz file using +[liblzma], along with a JavaScript wrapper that adapts `xzdec.wasm` for use +with the Node.js [`node:stream`] library. + +[liblzma]: https://github.com/tukaani-project/xz/tree/v5.4.5/src/liblzma +[`node:stream`]: https://nodejs.org/docs/latest-v18.x/api/stream.html + +Files: +- `xzdec.c` is a thin wrapper around liblzma that exports functions with a + WebAssembly-style ABI. It compiles to `xzdec.wasm`. +- `xzdec.wasm.gz` is a gzip-compressed `xzdec.wasm`, to reduce the size impact + of checking generated build artifacts into Git. +- `xzdec.ts` exports the `decompress(r: stream.Readable, w: stream.Writable)` + function, which instantiates a WebAssembly module from `xzdec.wasm.gz` and + decompresses an Xz bitstream. + +When building a new version of `xzdec.wasm.gz`, or verifying that the checked-in +artifact matches the expected output, Bazel should be run with `-c opt` so that +the compiled output is optimized. + +``` +$ cd src/infrastructure/xzdec +$ bazel build -c opt //:xzdec_wasm_gz +$ diff -s xzdec.wasm.gz bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz +Files xzdec.wasm.gz and bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz are identical +$ +``` + +Note that variations in the gzip compression may cause spurious differences +between `xzdec.wasm.gz` -- in this case, decompressing the two files and +comparing `xzdec.wasm` directly may provide more consistent behavior. diff --git a/src/infrastructure/xzdec/wasm.bzl b/src/infrastructure/xzdec/wasm.bzl new file mode 100644 index 0000000..ff76b24 --- /dev/null +++ b/src/infrastructure/xzdec/wasm.bzl @@ -0,0 +1,86 @@ +_WASM_ABIS = [ + "wasm32-wasip2", +] + +def _platform_transition(settings, attr): + return {"//command_line_option:platforms": str(attr._platform)} + +platform_transition = transition( + implementation = _platform_transition, + inputs = [], + outputs = ["//command_line_option:platforms"], +) + +def _wasm_binary(ctx): + out = ctx.outputs.out + if not out: + out = ctx.actions.declare_file(ctx.attr.name + ".wasm") + ctx.actions.symlink(output = out, target_file = ctx.file.lib) + return DefaultInfo(files = depset([out])) + +wasm_binary = rule( + implementation = _wasm_binary, + attrs = { + "lib": attr.label( + allow_single_file = True, + cfg = platform_transition, + ), + "out": attr.output(), + "_platform": attr.label( + default = Label("@toolchains_llvm//platforms:wasm32"), + ), + "_allowlist_function_transition": attr.label( + default = "@bazel_tools//tools/allowlists/function_transition_allowlist", + ), + }, +) + +_SYSROOT_BUILD = """ +filegroup( + name = {name}, + srcs = glob(["include/**/*", "lib/**/*", "share/**/*"], allow_empty=True), + visibility = ["//visibility:public"], +) +""" + +def _wasi_sysroot(ctx): + ctx.download_and_extract( + integrity = "sha256-NRcvfSeZSFsVpGsdh/UKWF2RXsZiCA8AXZkVOlCIjwg=", + stripPrefix = "wasi-sysroot-24.0", + url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sysroot-24.0.tar.gz"], + ) + + ctx.file("BUILD.bazel", "") + ctx.file("sysroots/BUILD.bazel", "") + for abi in _WASM_ABIS: + ctx.file("sysroots/%s/BUILD.bazel" % (abi,), _SYSROOT_BUILD.format( + name = repr(abi), + )) + ctx.execute(["mv", "include/" + abi, "sysroots/%s/include" % (abi,)]) + ctx.execute(["mv", "lib/" + abi, "sysroots/%s/lib" % (abi,)]) + ctx.execute(["mv", "share/" + abi, "sysroots/%s/share" % (abi,)]) + +wasi_sysroot = repository_rule( + implementation = _wasi_sysroot, +) + +def _wasm32_libclang_rt(ctx): + ctx.file("BUILD.bazel", """ +exports_files(["libclang_rt.builtins-wasm32.a"]) + +filegroup( + name = "libclang_rt-wasm32-wasi", + srcs = ["libclang_rt.builtins-wasm32.a"], + visibility = ["//visibility:public"], +) +""") + + ctx.download_and_extract( + integrity = "sha256-fjPA33WLkEabHePKFY4tCn9xk01YhFJbpqNy3gs7Dsc=", + stripPrefix = "libclang_rt.builtins-wasm32-wasi-24.0", + url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/libclang_rt.builtins-wasm32-wasi-24.0.tar.gz"], + ) + +wasm32_libclang_rt = repository_rule( + implementation = _wasm32_libclang_rt, +) diff --git a/src/infrastructure/xzdec/xzdec.c b/src/infrastructure/xzdec/xzdec.c new file mode 100644 index 0000000..0f817c1 --- /dev/null +++ b/src/infrastructure/xzdec/xzdec.c @@ -0,0 +1,95 @@ +#include +#include + +#include + +typedef uint32_t xzdec_lzma_ret; + +struct Xzdec { + lzma_stream stream; +}; + +__attribute__((export_name("xzdec_allocate"))) +uint8_t *xzdec_allocate(uint32_t len) { + return malloc(len); +} + +__attribute__((export_name("xzdec_deallocate"))) +void xzdec_deallocate(uint8_t *ptr) { + free(ptr); +} + +__attribute__((export_name("xzdec_new_stream_decoder"))) +xzdec_lzma_ret xzdec_new_stream_decoder( + uint32_t memlimit, + uint32_t flags, + struct Xzdec **xzdec_ptr +) { + lzma_stream stream = LZMA_STREAM_INIT; + lzma_ret rc = lzma_stream_decoder(&stream, memlimit, flags); + if (rc != LZMA_OK) { + return rc; + } + *xzdec_ptr = malloc(sizeof(struct Xzdec)); + (*xzdec_ptr)->stream = stream; + return LZMA_OK; +} + +__attribute__((export_name("xzdec_drop"))) +void xzdec_drop(struct Xzdec *xzdec) { + lzma_end(&(xzdec->stream)); + free(xzdec); +} + +__attribute__((export_name("xzdec_input_empty"))) +uint32_t xzdec_input_empty(struct Xzdec *xzdec) { + if (xzdec->stream.avail_in == 0) { + return 1; + } + return 0; +} + +__attribute__((export_name("xzdec_set_input"))) +void xzdec_set_input( + struct Xzdec *xzdec, + const uint8_t *input_buf, + uint32_t input_buf_len +) { + xzdec->stream.next_in = input_buf; + xzdec->stream.avail_in = input_buf_len; +} + +__attribute__((export_name("xzdec_next_output"))) +xzdec_lzma_ret xzdec_next_output( + struct Xzdec *xzdec, + uint8_t *output_buf, + uint32_t output_buf_cap, + uint32_t *output_buf_len +) { + xzdec->stream.next_out = output_buf; + xzdec->stream.avail_out = output_buf_cap; + lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_RUN); + *output_buf_len = output_buf_cap - xzdec->stream.avail_out; + return rc; +} + +__attribute__((export_name("xzdec_finish"))) +xzdec_lzma_ret xzdec_finish( + struct Xzdec *xzdec, + uint8_t *output_buf, + uint32_t output_buf_cap, + uint32_t *output_buf_len +) { + xzdec->stream.next_out = output_buf; + xzdec->stream.avail_out = output_buf_cap; + lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_FINISH); + *output_buf_len = output_buf_cap - xzdec->stream.avail_out; + return rc; +} + +// Prevent Clang from wrapping every inserted function and injecting calls +// to `__wasm_call_dtors()`. +void _initialize() { + void __wasm_call_ctors(); + __wasm_call_ctors(); +} diff --git a/src/infrastructure/xzdec/xzdec.ts b/src/infrastructure/xzdec/xzdec.ts new file mode 100644 index 0000000..208d150 --- /dev/null +++ b/src/infrastructure/xzdec/xzdec.ts @@ -0,0 +1,152 @@ +import { promises as fs } from "node:fs"; +import stream from "node:stream"; +import zlib from "node:zlib"; + +const LZMA_CONCATENATED = 0x08; + +type lzma_ret = number; + +const lzma_ret = { + OK : 0, + STREAM_END : 1, + NO_CHECK : 2, + UNSUPPORTED_CHECK : 3, + GET_CHECK : 4, + MEM_ERROR : 5, + MEMLIMIT_ERROR : 6, + FORMAT_ERROR : 7, + OPTIONS_ERROR : 8, + DATA_ERROR : 9, + BUF_ERROR : 10, + PROG_ERROR : 11, + SEEK_NEEDED : 12, +} as const; + +const BUF_SIZE = 0x10000; // 64 KiB + +// 128 MiB, large enough for archives encoded with `xz -9` plus some +// extra margin; +const MEM_LIMIT = 0x8000000; + +type ptr = number; + +interface xzdec_exports { + xzdec_allocate(len: number): ptr; + xzdec_deallocate(ptr: ptr): void; + + xzdec_new_stream_decoder( + memlimit: number, + flags: number, + xzdec_ptr: ptr, + ): lzma_ret; + + xzdec_drop(xzdec: ptr): lzma_ret; + + xzdec_input_empty(xzdec: ptr): number; + + xzdec_set_input(xzdec: ptr, input_buf: ptr, input_buf_len: number): void; + + xzdec_next_output( + xzdec: ptr, + output_buf: ptr, + output_buf_cap: number, + output_buf_len: ptr, + ): lzma_ret; + + xzdec_finish( + xzdec: ptr, + output_buf: ptr, + output_buf_cap: number, + output_buf_len: ptr, + ): lzma_ret; +}; + +let moduleOnce: Promise = null; + +async function loadXzdec(): Promise { + const wasmPath = "./infrastructure/xzdec/xzdec.wasm.gz"; + const wasmGzBytes = await fs.readFile(wasmPath); + const wasmBytes = new Uint8Array(zlib.gunzipSync(wasmGzBytes)); + return await WebAssembly.compile(wasmBytes); +} + +export async function decompress(r: stream.Readable, w: stream.Writable) { + if (moduleOnce === null) { + moduleOnce = loadXzdec(); + } + + const instance = await WebAssembly.instantiate(await moduleOnce, {}); + const mem: WebAssembly.Memory = instance.exports.memory as any; + const { + xzdec_allocate, + // xzdec_deallocate, + xzdec_new_stream_decoder, + // xzdec_drop, + xzdec_input_empty, + xzdec_set_input, + xzdec_next_output, + xzdec_finish, + } = (instance.exports as any) as xzdec_exports; + + const SCRATCH_SIZE = 8; + const scratchPtr = xzdec_allocate(SCRATCH_SIZE) as number; + const inputPtr = xzdec_allocate(BUF_SIZE) as number; + const outputPtr = xzdec_allocate(BUF_SIZE) as number; + if (scratchPtr == 0 || inputPtr === 0 || outputPtr === 0) { + throw new Error("xzdec_allocate() failed"); + } + + // struct scratch { + // struct Xzdec *; + // uint8_t *output_buf_len; + // } + const xzdecPtr = scratchPtr; + const outputLenPtr = scratchPtr + 4; + + const flags = LZMA_CONCATENATED; + let rc = xzdec_new_stream_decoder(MEM_LIMIT, flags, scratchPtr); + if (rc !== lzma_ret.OK) { + throw new Error(`xzdec_new_stream_decoder() failed: lzma_ret(${rc})`); + } + const xzdec = peekU32(mem, xzdecPtr); + + for await (let chunk of r) { + while (chunk.length > 0) { + if (xzdec_input_empty(xzdec) === 1) { + const input = chunk.subarray(0, BUF_SIZE); + chunk = chunk.subarray(BUF_SIZE); + new Uint8Array(mem.buffer, inputPtr, input.length).set(input); + xzdec_set_input(xzdec, inputPtr, input.length); + } + + while (xzdec_input_empty(xzdec) == 0) { + let rc = xzdec_next_output(xzdec, outputPtr, BUF_SIZE, outputLenPtr); + if (rc !== lzma_ret.OK) { + throw new Error(`xzdec_next_output() failed: lzma_ret(${rc})`); + } + const outputLen = peekU32(mem, outputLenPtr); + if (outputLen > 0) { + w.write(Buffer.from(mem.buffer, outputPtr, outputLen)); + } + } + } + } + + rc = xzdec_finish(xzdec, outputPtr, BUF_SIZE, outputLenPtr); + if (rc !== lzma_ret.OK) { + if (rc !== lzma_ret.STREAM_END) { + throw new Error(`xzdec_finish() failed: lzma_ret(${rc})`); + } + const outputLen = peekU32(mem, outputLenPtr); + if (outputLen > 0) { + w.write(Buffer.from(mem.buffer, outputPtr, outputLen)); + } + } + + // Be lazy and let the entire module get garbage-collected, instead of + // deallocating buffers. +} + +function peekU32(mem: WebAssembly.Memory, addr: ptr): number { + return new Uint32Array(mem.buffer, addr)[0]; +} diff --git a/src/infrastructure/xzdec/xzdec.wasm.gz b/src/infrastructure/xzdec/xzdec.wasm.gz new file mode 100644 index 0000000..c3c457b Binary files /dev/null and b/src/infrastructure/xzdec/xzdec.wasm.gz differ diff --git a/tools/copy-dist-files.js b/tools/copy-dist-files.js index beec9b8..8613f89 100644 --- a/tools/copy-dist-files.js +++ b/tools/copy-dist-files.js @@ -9,7 +9,11 @@ async function main() { } function getDistributableFiles() { - return globbySync(["package.json", "yarn.lock"], { + return globbySync([ + "package.json", + "yarn.lock", + "src/infrastructure/xzdec/xzdec.wasm.gz", + ], { cwd: PROJECT_DIR, }); } @@ -18,7 +22,8 @@ function copyFilesToDist(files) { console.info(`Copying extra files to ${DIST_DIR}`); files.forEach((file) => { const src = path.join(PROJECT_DIR, file); - const dest = path.join(DIST_DIR, "publish-to-bcr", file); + const distFile = file.replace(/^src\//, ""); + const dest = path.join(DIST_DIR, "publish-to-bcr", distFile); console.info(` => ${file}`); fs.mkdirSync(path.dirname(dest), { recursive: true }); fs.copyFileSync(src, dest); diff --git a/tsconfig.json b/tsconfig.json index ef290ae..087175f 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -14,5 +14,5 @@ "types": ["jest", "node"] }, "include": ["src/**/*.ts"], - "exclude": ["node_modules", "src/**/*.spec.ts", "src/test"] + "exclude": ["node_modules", "src/**/*.spec.ts", "src/test", "src/**/bazel-*"] }