Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: Enable confidential ephemeral volumes #230

Draft
wants to merge 3 commits into
base: msft-main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agent/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ serde_json = "1.0.39"
scan_fmt = "0.2.3"
scopeguard = "1.0.0"
thiserror = "1.0.26"
rand = "0.8.5"
regex = "1.10.4"
serial_test = "0.5.1"
kata-sys-util = { path = "../libs/kata-sys-util" }
Expand Down
223 changes: 223 additions & 0 deletions src/agent/src/storage/encryption.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
use std::env::temp_dir;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;

use anyhow::{anyhow, Result};
use rand::{distributions::Alphanumeric, Rng};
use slog::Logger;
use tracing::instrument;

// encrypt_device encrypts and formats a device, then returns the path
// of the newly-created dm-crypt device.
#[instrument]
pub fn encrypt_device(logger: &Logger, device_path: &Path) -> Result<PathBuf> {
// Path to the key file that will be passed to the cryptsetup
// commands.
let key_file_path = {
let random_string: String = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(5)
.map(char::from)
.collect();
let filename = format!("encrypted_storage_key_{}", random_string);
temp_dir().join(filename)
};

// Generate a random encryption key and write it to the key file.
let mut key = vec![0u8; 4096];
rand::thread_rng().fill(&mut key[..]);
let mut key_file = File::create(&key_file_path)?;
key_file.write_all(&key)?;

// Name of the devmapper that will live under /dev/mapper/.
let devmapper_device_name = device_path
.file_name()
.ok_or_else(|| anyhow!("invalid path"))?
.to_string_lossy()
.into_owned();

let script_path: PathBuf = temp_dir().join("luks-encrypt-storage.sh");
if !script_path.exists() {
let mut script_file = File::create(&script_path)?;
script_file.write_all(LUKS_ENCRYPT_STORAGE_SCRIPT.as_bytes())?;
}

info!(logger, "Running luks-encrypt-storage.sh");
let output = Command::new("bash")
.args([
script_path.display().to_string(),
device_path.display().to_string(), // device_path
devmapper_device_name.to_string(), // opened_device_name
"false".to_string(), // is_encrypted (false so the script encrypts it)
key_file_path.display().to_string(), // storage_key_path
"true".to_string(), // data_integrity
])
.output()?;
if !output.status.success() {
info!(logger, "Failed to run luks-encrypt-storage.sh";
"status" => output.status.code().unwrap_or(-1),
"stdout" => String::from_utf8_lossy(&output.stdout).to_string(),
"stderr" => String::from_utf8_lossy(&output.stderr).to_string(),
);
assert!(output.status.success());
}

// We're now mounting from the dm-crypt device, not the original
// device (now ciphertext), so we return the devmapper device.
let devmapper_device_path = PathBuf::from(format!("/dev/mapper/{devmapper_device_name}"));
Ok(devmapper_device_path)
}

// Reference: https://github.com/confidential-containers/guest-components/blob/main/confidential-data-hub/storage/scripts/luks-encrypt-storage
static LUKS_ENCRYPT_STORAGE_SCRIPT: &str = r#"
#!/bin/bash
#
# Copyright (c) 2022 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#

set -o errexit
set -o nounset
set -o pipefail
set -o errtrace

#[ -n "${DEBUG:-}" ] && set -o xtrace
set -o xtrace

handle_error() {
local exit_code="${?}"
local line_number="${1:-}"
echo "error:"
echo "Failed at $line_number: ${BASH_COMMAND}"
exit "${exit_code}"
}
trap 'handle_error $LINENO' ERR

die()
{
local msg="$*"
echo >&2 "ERROR: $msg"
exit 1
}

setup()
{
local cmds=()

cmds+=("cryptsetup" "mkfs.ext4" "mount")

local cmd
for cmd in "${cmds[@]}"
do
command -v "$cmd" &>/dev/null || die "need command: '$cmd'"
done
}

setup

device_path=${1:-}
if [ -z "$device_path" ]; then
die "invalid arguments, at least one param for device path"
fi

opened_device_name=${2:-}
if [ -z "$opened_device_name" ]; then
die "invalid arguments, at least one param for device path"
fi

is_encrypted="false"
if [ -n "${3-}" ]; then
is_encrypted="$3"
fi

storage_key_path="/run/encrypt_storage.key"
if [ -n "${4-}" ]; then
storage_key_path="$4"
fi

data_integrity="true"
if [ -n "${5-}" ]; then
data_integrity="$5"
fi

if [[ -b "$device_path" ]]; then

if [ "$is_encrypted" == "false" ]; then
echo >&2 "is_encrypted=false branch"

if [ "$data_integrity" == "false" ]; then
echo >&2 "integ=false branch"
cryptsetup --verbose --debug --batch-mode luksFormat --type luks2 "$device_path" --sector-size 4096 \
--cipher aes-xts-plain64 "$storage_key_path"
else
echo >&2 "integ=true branch"
# Wiping a device is a time consuming operation. To avoid a full wipe, integritysetup
# and crypt setup provide a --no-wipe option.
# However, an integrity device that is not wiped will have invalid checksums. Normally
# this should not be a problem since a page must first be written to before it can be read
# (otherwise the data would be arbitrary). The act of writing would populate the checksum
# for the page.
# However, tools like mkfs.ext4 read pages before they are written; sometimes the read
# of an unwritten page happens due to kernel buffering.
# See https://gitlab.com/cryptsetup/cryptsetup/-/issues/525 for explanation and fix.
# The way to propery format the non-wiped dm-integrity device is to figure out which pages
# mkfs.ext4 will write to and then to write to those pages before hand so that they will
# have valid integrity tags.
cryptsetup --verbose --debug --batch-mode luksFormat --type luks2 "$device_path" --sector-size 4096 \
--cipher aes-xts-plain64 --integrity hmac-sha256 "$storage_key_path" \
--integrity-no-wipe
fi
fi

cryptsetup luksOpen -d "$storage_key_path" "$device_path" "$opened_device_name"
rm "$storage_key_path"

if [ "$data_integrity" == "false" ]; then
mkfs.ext4 "/dev/mapper/$opened_device_name" -E lazy_journal_init
else
# mkfs.ext4 doesn't perform whole sector writes and this will cause checksum failures
# with an unwiped integrity device. Therefore, first perform a dry run.
output=$(mkfs.ext4 "/dev/mapper/$opened_device_name" -F -n)

# The above command will produce output like
# mke2fs 1.46.5 (30-Dec-2021)
# Creating filesystem with 268435456 4k blocks and 67108864 inodes
# Filesystem UUID: 4a5ff012-91c0-47d9-b4bb-8f83e830825f
# Superblock backups stored on blocks:
# 32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208,
# 4096000, 7962624, 11239424, 20480000, 23887872, 71663616, 78675968,
# 102400000, 214990848
delimiter="Superblock backups stored on blocks:"
blocks_list=$([[ $output =~ $delimiter(.*) ]] && echo "${BASH_REMATCH[1]}")

# Find list of blocks
block_nums=$(echo "$blocks_list" | grep -Eo '[0-9]{4,}' | sort -n)

if [ -z "$block_nums" ]; then
die "Block numbers not found"
fi

# Add zero to list of blocks
block_nums="0 $block_nums"

# Iterate through each block and write to it to ensure that it has valid checksum
for block_num in $block_nums
do
echo "Clearing page at $block_num"
# Zero out the page
dd if=/dev/zero bs=4k count=1 oflag=direct \
of="/dev/mapper/$opened_device_name" seek="$block_num"
done

# Now perform the actual ext4 format. Use lazy_journal_init so that the journal is
# initialized on demand. This is safe for ephemeral storage since we don't expect
# ephemeral storage to survice a power cycle.
mkfs.ext4 "/dev/mapper/$opened_device_name" -E lazy_journal_init
fi
else
die "Invalid device: '$device_path'"
fi
"#;
21 changes: 18 additions & 3 deletions src/agent/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ use zerocopy::AsBytes;
use self::bind_watcher_handler::BindWatcherHandler;
use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler};
use self::ephemeral_handler::EphemeralHandler;
use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler, SMBHandler};
use self::fs_handler::{OverlayfsHandler, SMBHandler, Virtio9pHandler, VirtioFsHandler};
use self::local_handler::LocalHandler;
use crate::device::{
DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE,
DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE,
DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, DRIVER_SMB_TYPE,
DRIVER_SMB_TYPE, DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE,
};
use crate::mount::{baremount, is_mounted, remove_mounts};
use crate::sandbox::Sandbox;
Expand All @@ -39,6 +39,7 @@ pub use self::ephemeral_handler::update_ephemeral_mounts;

mod bind_watcher_handler;
mod block_handler;
mod encryption;
mod ephemeral_handler;
mod fs_handler;
mod local_handler;
Expand Down Expand Up @@ -380,8 +381,22 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
"mount-options" => options.as_str(),
);

let confidential = storage
.driver_options
.contains(&"confidential=true".to_string());
let ephemeral = storage
.driver_options
.contains(&"ephemeral=true".to_string());

let src_path = if confidential && ephemeral {
sprt marked this conversation as resolved.
Show resolved Hide resolved
// TODO: Call into the CDH instead after we've synced with upstream.
encryption::encrypt_device(&logger, src_path)?
} else {
src_path.to_path_buf()
};

baremount(
src_path,
&src_path,
mount_path,
storage.fstype.as_str(),
flags,
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/pkg/direct-volume/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ const (
FSGroupMetadataKey = "fsGroup"
FSGroupChangePolicyMetadataKey = "fsGroupChangePolicy"
SensitiveMountOptions = "sensitiveMountOptions"
ConfidentialMetadataKey = "confidential"
EphemeralMetadataKey = "ephemeral"
)

// FSGroupChangePolicy holds policies that will be used for applying fsGroup to a volume.
Expand Down
14 changes: 14 additions & 0 deletions src/runtime/virtcontainers/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,20 @@ func (c *Container) createBlockDevices(ctx context.Context) error {
c.mounts[i].FSGroupChangePolicy = volume.FSGroupChangePolicy(value)
case volume.SensitiveMountOptions:
c.mounts[i].Options = append(c.mounts[i].Options, value)
case volume.ConfidentialMetadataKey:
confidential, err := strconv.ParseBool(value)
if err != nil {
c.Logger().Errorf("invalid value %q for metadata key %q, expected boolean string", value, key)
continue
}
c.mounts[i].Confidential = confidential
case volume.EphemeralMetadataKey:
ephemeral, err := strconv.ParseBool(value)
if err != nil {
c.Logger().Errorf("invalid value %q for metadata key %q, expected boolean string", value, key)
continue
}
c.mounts[i].Ephemeral = ephemeral
default:
c.Logger().Warnf("Ignoring unsupported direct-assignd volume metadata key: %s, value: %s", key, value)
}
Expand Down
7 changes: 7 additions & 0 deletions src/runtime/virtcontainers/kata_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1617,6 +1617,13 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.De
}
}

if m.Confidential {
vol.DriverOptions = append(vol.DriverOptions, fmt.Sprintf("%s=true", volume.ConfidentialMetadataKey))
}
if m.Ephemeral {
vol.DriverOptions = append(vol.DriverOptions, fmt.Sprintf("%s=true", volume.EphemeralMetadataKey))
}

return vol, nil
}

Expand Down
7 changes: 7 additions & 0 deletions src/runtime/virtcontainers/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,13 @@ type Mount struct {
// FSGroupChangePolicy specifies the policy that will be used when applying
// group id ownership change for a volume.
FSGroupChangePolicy volume.FSGroupChangePolicy

// Confidential specifies whether the underlying storage is encrypted.
Confidential bool

// Ephemeral specifies whether the underlying storage is ephemeral:
// https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/
Ephemeral bool
}

func isSymlink(path string) bool {
Expand Down
6 changes: 4 additions & 2 deletions src/tools/genpolicy/genpolicy-settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,15 @@
"CAP_CHECKPOINT_RESTORE"
],
"virtio_blk_storage_classes": [
"cc-local-csi",
"cc-managed-csi",
"cc-managed-premium-csi"
],
"smb_storage_classes": [
"cc-azurefile-csi",
"cc-azurefile-premium-csi"
],
"coco_ephemeral_storage_classes": [
"cc-local-csi"
]
},
"kata_config": {
Expand Down Expand Up @@ -322,4 +324,4 @@
"UpdateEphemeralMountsRequest": false,
"WriteStreamRequest": false
}
}
}
Loading
Loading