From 13e319b4131a4eec01d3b17a3a3d64e917127b46 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Thu, 31 Oct 2024 11:58:28 +0100 Subject: [PATCH] Map current user to root in subuid user namespace By mapping the current user to root in the subuid user namespace, we don't have to change the ownership of all the files in the directory tree to root in the subuid uid/gid range. This means that on btrfs filesystems, we can do a subvolume snapshot instead of an expensive full tree recursion to copy each file individually. --- mkosi/qemu.py | 19 ++++++++++--------- mkosi/user.py | 31 ++++++++++++------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/mkosi/qemu.py b/mkosi/qemu.py index f21760c817..52a01949fa 100644 --- a/mkosi/qemu.py +++ b/mkosi/qemu.py @@ -565,9 +565,11 @@ def copy_ephemeral(config: Config, src: Path) -> Iterator[Path]: yield src return - # If we're booting a directory image that was not built as root, we have to make an ephemeral copy so - # that we can ensure the files in the directory are either owned by the actual root user or a fake one in - # a subuid user namespace which we'll run virtiofsd as. + # If we're booting a directory image that was not built as root, we have to make an ephemeral copy. If we're + # running as root, we have to make an ephemeral copy so that all the files in the directory tree are also owned by + # root. If we're not running as root, we'll be making use of a subuid/subgid user namespace and we don't want any + # leftover files from the subuid/subgid user namespace to remain after we shut down the container or virtual + # machine. if not config.ephemeral and (config.output_format != OutputFormat.directory or src.stat().st_uid == 0): with flock_or_die(src): yield src @@ -583,9 +585,7 @@ def copy_ephemeral(config: Config, src: Path) -> Iterator[Path]: try: def copy() -> None: - if config.output_format == OutputFormat.directory: - become_root_in_subuid_range() - elif config.output_format in (OutputFormat.disk, OutputFormat.esp): + if config.output_format in (OutputFormat.disk, OutputFormat.esp): attr = run( ["lsattr", "-l", workdir(src)], sandbox=config.sandbox(binary="lsattr", options=["--ro-bind", src, workdir(src)]), @@ -599,9 +599,10 @@ def copy() -> None: copy_tree( src, tmp, - # Make sure the ownership is changed to the (fake) root user if the directory was not built - # as root. - preserve=config.output_format == OutputFormat.directory and src.stat().st_uid == 0, + preserve=( + config.output_format == OutputFormat.directory + and (os.getuid() != 0 or src.stat().st_uid == 0) + ), use_subvolumes=config.use_subvolumes, sandbox=config.sandbox, ) diff --git a/mkosi/user.py b/mkosi/user.py index c74523e335..aadf8d9f60 100644 --- a/mkosi/user.py +++ b/mkosi/user.py @@ -112,8 +112,8 @@ def become_root_in_subuid_range() -> None: """ Set up a new user namespace mapping using /etc/subuid and /etc/subgid. - The current process becomes the root user in the new user namespace and the current user and group will - be mapped to 65436. The other IDs will be mapped through. + The current user is mapped to root and the current process becomes the root user in the new user + namespace. The other IDs will be mapped through. """ if os.getuid() == 0: return @@ -128,21 +128,18 @@ def become_root_in_subuid_range() -> None: # We map the private UID range configured in /etc/subuid and /etc/subgid into the user namespace # using newuidmap and newgidmap. On top of that, we also make sure to map in the user running mkosi - # so that we can access files and directories from the current user from within the user - # namespace. We don't map to the last user in the range as the last user is sometimes used in tests - # as a default value and mapping to that user might break those tests. + # to root so that we can access files and directories from the current user from within the user + # namespace. newuidmap = [ "flock", "--exclusive", "--close", lock, "newuidmap", pid, - 0, subuid, SUBRANGE - 100, - SUBRANGE - 100, os.getuid(), 1, - SUBRANGE - 100 + 1, subuid + SUBRANGE - 100 + 1, 99 + 0, os.getuid(), 1, + 1, subuid + 1, SUBRANGE - 1, ] # fmt: skip newgidmap = [ "flock", "--exclusive", "--close", lock, "newgidmap", pid, - 0, subgid, SUBRANGE - 100, - SUBRANGE - 100, os.getgid(), 1, - SUBRANGE - 100 + 1, subgid + SUBRANGE - 100 + 1, 99 + 0, os.getgid(), 1, + 1, subgid + 1, SUBRANGE - 1, ] # fmt: skip # newuidmap and newgidmap have to run from outside the user namespace to be able to assign a uid @@ -162,8 +159,6 @@ def become_root_in_subuid_range() -> None: uidmap.wait() gidmap.wait() - # By default, we're root in the user namespace because if we were our current user by default, we - # wouldn't be able to chown stuff to be owned by root while the reverse is possible. os.setresuid(0, 0, 0) os.setresgid(0, 0, 0) os.setgroups([0]) @@ -180,12 +175,10 @@ def become_root_in_subuid_range_cmd() -> list[str]: "unshare", "--setuid", "0", "--setgid", "0", - "--map-users", f"0:{subuid}:{SUBRANGE - 100}", - "--map-users", f"{SUBRANGE - 100}:{os.getuid()}:1", - "--map-users", f"{SUBRANGE - 100 + 1}:{subuid + SUBRANGE - 100 + 1}:99", - "--map-groups", f"0:{subgid}:{SUBRANGE - 100}", - "--map-groups", f"{SUBRANGE - 100}:{os.getgid()}:1", - "--map-groups", f"{SUBRANGE - 100 + 1}:{subgid + SUBRANGE - 100 + 1}:99", + "--map-users", f"0:{os.getuid()}:1", + "--map-users", f"1:{subuid + 1}:{SUBRANGE - 1}", + "--map-groups", f"0:{os.getgid()}:1", + "--map-groups", f"1:{subgid + 1}:{SUBRANGE - 1}", "--keep-caps", ] # fmt: skip