Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Support for Symlinks in tar Rule's Runfiles Handling #1036

Merged
merged 9 commits into from
Jan 31, 2025
Merged
5 changes: 4 additions & 1 deletion docs/tar.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 84 additions & 1 deletion lib/private/modify_mtree.awk
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,88 @@
if (package_dir != "") {
sub(/^/, package_dir "/")
}
print;
if (preserve_symlinks != "") {
# By default Bazel reports symlinks as regular file/dir therefore mtree_spec has no way of knowing that a file
# is a symlink. This is a problem when we want to preserve symlinks especially for symlink sensitive applications
# such as nodejs with pnpm. To work around this we need to determine if a file a symlink and if so, we need to
# determine where the symlink points to by calling readlink repeatedly until we get the final destination.
#
# We then need to decide if it's a symlink based on how many times we had to call readlink and where we ended up.
#
# Unlike Bazels own symlinks, which points out of the sandbox symlinks, symlinks created by ctx.actions.symlink
# stays within the bazel sandbox so it's possible to detect those.
#
# See https://github.com/bazelbuild/rules_pkg/pull/609

symlink = ""
if ($0 ~ /type=file/ && $0 ~ /content=/) {
match($0, /content=[^ ]+/)
content_field = substr($0, RSTART, RLENGTH)
split(content_field, parts, "=")
path = parts[2]
# Store paths for look up
symlink_map[path] = $1
# Resolve the symlink if it exists
resolved_path = ""
cmd = "readlink -f \"" path "\""
cmd | getline resolved_path
close(cmd)
# If readlink -f fails use readlink for relative links
if (resolved_path == "") {
cmd = "readlink \"" path "\""
cmd | getline resolved_path
close(cmd)
}


if (resolved_path) {
if (resolved_path ~ bin_dir || resolved_path ~ /\.\.\//) {
# Strip down the resolved path to start from bin_dir
sub("^.*" bin_dir, bin_dir, resolved_path)
# If the resolved path is different from the original path,
# or if it's a relative path
if (path != resolved_path || resolved_path ~ /\.\.\//) {
symlink = resolved_path
}
}
}
}
if (symlink != "") {
line_array[NR] = $1 SUBSEP resolved_path
}
else {
line_array[NR] = $0 # Store other lines too, with an empty path
}
}

else {

print; # Print immediately if symlinks are not preserved

}
}
END {
if (preserve_symlinks != "") {
# Process symlinks if needed
for (i = 1; i <= NR; i++) {
line = line_array[i]
if (index(line, SUBSEP) > 0) { # Check if this path was a symlink
split(line, fields, SUBSEP)
field0 = fields[1]
resolved_path = fields[2]
if (resolved_path in symlink_map) {
linked_to = symlink_map[resolved_path]
}
else {
linked_to = resolved_path
}
# Adjust the line for symlink using the map we created
new_line = field0 " type=link link=" linked_to
print new_line
} else {
# Print the original line if no symlink adjustment was needed
print line
}
}
}
}
88 changes: 88 additions & 0 deletions lib/private/tar.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,43 @@ _mtree_attrs = {
"srcs": attr.label_list(doc = "Files that are placed into the tar", allow_files = True),
"out": attr.output(doc = "Resulting specification file to write"),
}
_mutate_mtree_attrs = {
"mtree": attr.label(
allow_single_file = True,
doc = "Specifies the path to the mtree file, which describes the directory structure and metadata for the tar file. Must be a single file.",
),
"awk_script": attr.label(
allow_single_file = True,
default = "@aspect_bazel_lib//lib/private:modify_mtree.awk",
doc = "Path to an AWK script used to modify the mtree file. By default, it uses the modify_mtree.awk script.",
),
"srcs": attr.label_list(
allow_files = True,
doc = "Files, directories, or other targets whose default outputs will used to create symlinks",
),
"preserve_symlinks": attr.bool(
default = False,
doc = "If True, symbolic links in the source files are preserved in the tar file. If False, the links are resolved to their actual targets.",
),
"strip_prefix": attr.string(
doc = "A prefix to strip from the paths of files and directories when they are added to the tar file.",
),
"package_dir": attr.string(
doc = "Specifies a base directory within the tar file where all files will be placed. Sets the root directory for the tar contents.",
),
"mtime": attr.string(
doc = "Specifies the modification time (mtime) to be applied to all files in the tar file. Used for deterministic builds.",
),
"owner": attr.string(
doc = "Specifies the numeric user ID (UID) for the owner of the files in the tar archive.",
),
"ownername": attr.string(
doc = "Specifies the name of the owner of the files in the tar archive. Used alongside 'owner'.",
),
"out": attr.output(
doc = "The output of the mutation, a new mtree file.",
),
}

def _add_compression_args(compress, args):
if compress == "bzip2":
Expand Down Expand Up @@ -446,6 +483,57 @@ def _mtree_impl(ctx):

return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))

def _mtree_mutate_impl(ctx):
srcs_runfiles = [
src[DefaultInfo].default_runfiles.files
for src in ctx.attr.srcs
]
args = ctx.actions.args()

out_mtree = ctx.outputs.out
thesayyn marked this conversation as resolved.
Show resolved Hide resolved

# Use bin directory to determine if symlink is within or outside the sandbox
args.add("-v bin_dir={}".format(ctx.bin_dir.path))

if ctx.attr.owner:
args.add("-v owner={}".format(ctx.attr.owner))
if ctx.attr.ownername:
args.add("-v ownername={}".format(ctx.attr.ownername))
if ctx.attr.strip_prefix:
args.add("-v strip_prefix={}".format(ctx.attr.strip_prefix))
if ctx.attr.package_dir:
args.add("-v package_dir={}".format(ctx.attr.package_dir))
if ctx.attr.mtime:
args.add("-v mtime={}".format(ctx.attr.mtime))
if ctx.attr.preserve_symlinks:
args.add("-v preserve_symlinks=1")

inputs = ctx.files.srcs[:]
inputs.append(ctx.file.mtree)
inputs.append(ctx.file.awk_script)
ctx.actions.run_shell(
command = """
awk $@ -f {awk_script} {mtree} > {out_mtree}
""".format(
awk_script = ctx.file.awk_script.path,
mtree = ctx.file.mtree.path,
out_mtree = out_mtree.path,
),
arguments = [args],
inputs = depset(
direct = inputs,
transitive = srcs_runfiles,
),
outputs = [out_mtree],
)

return [DefaultInfo(files = depset([out_mtree]))]

mtree_mutate = rule(
implementation = _mtree_mutate_impl,
attrs = _mutate_mtree_attrs,
)

tar_lib = struct(
attrs = _tar_attrs,
implementation = _tar_impl,
Expand Down
40 changes: 22 additions & 18 deletions lib/tar.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ TODO:
load("@bazel_skylib//lib:types.bzl", "types")
load("//lib:expand_template.bzl", "expand_template")
load("//lib:utils.bzl", "propagate_common_rule_attributes")
load("//lib/private:tar.bzl", _tar = "tar", _tar_lib = "tar_lib")
load("//lib/private:tar.bzl", _mutate_mtree = "mtree_mutate", _tar = "tar", _tar_lib = "tar_lib")

mtree_spec = rule(
doc = "Create an mtree specification to map a directory hierarchy. See https://man.freebsd.org/cgi/man.cgi?mtree(8)",
Expand Down Expand Up @@ -137,6 +137,8 @@ def tar(name, mtree = "auto", stamp = 0, **kwargs):
def mtree_mutate(
name,
mtree,
srcs = None,
preserve_symlinks = False,
strip_prefix = None,
package_dir = None,
mtime = None,
Expand All @@ -149,6 +151,8 @@ def mtree_mutate(
Args:
name: name of the target, output will be `[name].mtree`.
mtree: input mtree file, typically created by `mtree_spec`.
srcs: list of files to resolve symlinks for.
preserve_symlinks: `EXPERIMENTAL!` We may remove or change it at any point without further notice. Flag to determine whether to preserve symlinks in the tar.
strip_prefix: prefix to remove from all paths in the tar. Files and directories not under this prefix are dropped.
package_dir: directory prefix to add to all paths in the tar.
mtime: new modification time for all entries.
Expand All @@ -157,23 +161,23 @@ def mtree_mutate(
awk_script: may be overridden to change the script containing the modification logic.
**kwargs: additional named parameters to genrule
"""
vars = []
if strip_prefix:
vars.append("-v strip_prefix='{}'".format(strip_prefix))
if package_dir:
vars.append("-v package_dir='{}'".format(package_dir))
if mtime:
vars.append("-v mtime='{}'".format(mtime))
if owner:
vars.append("-v owner='{}'".format(owner))
if ownername:
vars.append("-v ownername='{}'".format(ownername))

native.genrule(
if preserve_symlinks and not srcs:
fail("preserve_symlinks requires srcs to be set in order to resolve symlinks")

# Check if srcs is of type list
if srcs and not types.is_list(srcs):
srcs = [srcs]
_mutate_mtree(
name = name,
srcs = [mtree],
outs = [name + ".mtree"],
cmd = "awk {} -f $(execpath {}) <$< >$@".format(" ".join(vars), awk_script),
tools = [awk_script],
mtree = mtree,
srcs = srcs,
preserve_symlinks = preserve_symlinks,
strip_prefix = strip_prefix,
package_dir = package_dir,
mtime = str(mtime) if mtime else None,
owner = owner,
ownername = ownername,
awk_script = awk_script,
out = "{}.mtree".format(name),
**kwargs
)
Loading
Loading