Skip to content

Commit

Permalink
Map current user to root in subuid user namespace
Browse files Browse the repository at this point in the history
By mapping the current user to root in the subuid user namespace,
we don't have to change the ownership of all the files in the directory
tree to root in the subuid uid/gid range. This means that on btrfs
filesystems, we can do a subvolume snapshot instead of an expensive
full tree recursion to copy each file individually.
  • Loading branch information
DaanDeMeyer committed Oct 31, 2024
1 parent b311778 commit 13e319b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 28 deletions.
19 changes: 10 additions & 9 deletions mkosi/qemu.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,11 @@ def copy_ephemeral(config: Config, src: Path) -> Iterator[Path]:
yield src
return

# If we're booting a directory image that was not built as root, we have to make an ephemeral copy so
# that we can ensure the files in the directory are either owned by the actual root user or a fake one in
# a subuid user namespace which we'll run virtiofsd as.
# If we're booting a directory image that was not built as root, we have to make an ephemeral copy. If we're
# running as root, we have to make an ephemeral copy so that all the files in the directory tree are also owned by
# root. If we're not running as root, we'll be making use of a subuid/subgid user namespace and we don't want any
# leftover files from the subuid/subgid user namespace to remain after we shut down the container or virtual
# machine.
if not config.ephemeral and (config.output_format != OutputFormat.directory or src.stat().st_uid == 0):
with flock_or_die(src):
yield src
Expand All @@ -583,9 +585,7 @@ def copy_ephemeral(config: Config, src: Path) -> Iterator[Path]:
try:

def copy() -> None:
if config.output_format == OutputFormat.directory:
become_root_in_subuid_range()
elif config.output_format in (OutputFormat.disk, OutputFormat.esp):
if config.output_format in (OutputFormat.disk, OutputFormat.esp):
attr = run(
["lsattr", "-l", workdir(src)],
sandbox=config.sandbox(binary="lsattr", options=["--ro-bind", src, workdir(src)]),
Expand All @@ -599,9 +599,10 @@ def copy() -> None:
copy_tree(
src,
tmp,
# Make sure the ownership is changed to the (fake) root user if the directory was not built
# as root.
preserve=config.output_format == OutputFormat.directory and src.stat().st_uid == 0,
preserve=(
config.output_format == OutputFormat.directory
and (os.getuid() != 0 or src.stat().st_uid == 0)
),
use_subvolumes=config.use_subvolumes,
sandbox=config.sandbox,
)
Expand Down
31 changes: 12 additions & 19 deletions mkosi/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def become_root_in_subuid_range() -> None:
"""
Set up a new user namespace mapping using /etc/subuid and /etc/subgid.
The current process becomes the root user in the new user namespace and the current user and group will
be mapped to 65436. The other IDs will be mapped through.
The current user is mapped to root and the current process becomes the root user in the new user
namespace. The other IDs will be mapped through.
"""
if os.getuid() == 0:
return
Expand All @@ -128,21 +128,18 @@ def become_root_in_subuid_range() -> None:

# We map the private UID range configured in /etc/subuid and /etc/subgid into the user namespace
# using newuidmap and newgidmap. On top of that, we also make sure to map in the user running mkosi
# so that we can access files and directories from the current user from within the user
# namespace. We don't map to the last user in the range as the last user is sometimes used in tests
# as a default value and mapping to that user might break those tests.
# to root so that we can access files and directories from the current user from within the user
# namespace.
newuidmap = [
"flock", "--exclusive", "--close", lock, "newuidmap", pid,
0, subuid, SUBRANGE - 100,
SUBRANGE - 100, os.getuid(), 1,
SUBRANGE - 100 + 1, subuid + SUBRANGE - 100 + 1, 99
0, os.getuid(), 1,
1, subuid + 1, SUBRANGE - 1,
] # fmt: skip

newgidmap = [
"flock", "--exclusive", "--close", lock, "newgidmap", pid,
0, subgid, SUBRANGE - 100,
SUBRANGE - 100, os.getgid(), 1,
SUBRANGE - 100 + 1, subgid + SUBRANGE - 100 + 1, 99
0, os.getgid(), 1,
1, subgid + 1, SUBRANGE - 1,
] # fmt: skip

# newuidmap and newgidmap have to run from outside the user namespace to be able to assign a uid
Expand All @@ -162,8 +159,6 @@ def become_root_in_subuid_range() -> None:
uidmap.wait()
gidmap.wait()

# By default, we're root in the user namespace because if we were our current user by default, we
# wouldn't be able to chown stuff to be owned by root while the reverse is possible.
os.setresuid(0, 0, 0)
os.setresgid(0, 0, 0)
os.setgroups([0])
Expand All @@ -180,12 +175,10 @@ def become_root_in_subuid_range_cmd() -> list[str]:
"unshare",
"--setuid", "0",
"--setgid", "0",
"--map-users", f"0:{subuid}:{SUBRANGE - 100}",
"--map-users", f"{SUBRANGE - 100}:{os.getuid()}:1",
"--map-users", f"{SUBRANGE - 100 + 1}:{subuid + SUBRANGE - 100 + 1}:99",
"--map-groups", f"0:{subgid}:{SUBRANGE - 100}",
"--map-groups", f"{SUBRANGE - 100}:{os.getgid()}:1",
"--map-groups", f"{SUBRANGE - 100 + 1}:{subgid + SUBRANGE - 100 + 1}:99",
"--map-users", f"0:{os.getuid()}:1",
"--map-users", f"1:{subuid + 1}:{SUBRANGE - 1}",
"--map-groups", f"0:{os.getgid()}:1",
"--map-groups", f"1:{subgid + 1}:{SUBRANGE - 1}",
"--keep-caps",
] # fmt: skip

Expand Down

0 comments on commit 13e319b

Please sign in to comment.