diff --git a/.github/workflows/hydrun.yaml b/.github/workflows/hydrun.yaml new file mode 100644 index 00000000000..2afecea26af --- /dev/null +++ b/.github/workflows/hydrun.yaml @@ -0,0 +1,97 @@ +name: hydrun CI + +on: + push: + pull_request: + schedule: + - cron: "0 0 * * 0" + +jobs: + build-linux: + runs-on: ${{ matrix.target.runner }} + permissions: + contents: read + strategy: + matrix: + target: + # Binaries + - id: rust.x86_64 + src: . + os: alpine:edge + flags: "" + cmd: ./Hydrunfile rust x86_64 + dst: out/* + runner: depot-ubuntu-22.04-32 + - id: rust.aarch64 + src: . + os: alpine:edge + flags: "" + cmd: ./Hydrunfile rust aarch64 + dst: out/* + runner: depot-ubuntu-22.04-arm-32 + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Restore ccache + uses: actions/cache/restore@v4 + with: + path: | + /tmp/ccache + key: cache-ccache-${{ matrix.target.id }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Set up hydrun + run: | + curl -L -o /tmp/hydrun "https://github.com/pojntfx/hydrun/releases/latest/download/hydrun.linux-$(uname -m)" + sudo install /tmp/hydrun /usr/local/bin + - name: Build with hydrun + working-directory: ${{ matrix.target.src }} + run: hydrun -o ${{ matrix.target.os }} ${{ matrix.target.flags }} "${{ matrix.target.cmd }}" + - name: Fix permissions for output + run: sudo chown -R $USER . + - name: Save ccache + uses: actions/cache/save@v4 + with: + path: | + /tmp/ccache + key: cache-ccache-${{ matrix.target.id }} + - name: Upload output + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.target.id }} + path: ${{ matrix.target.dst }} + + publish-linux: + runs-on: ubuntu-latest + permissions: + contents: write + needs: build-linux + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download output + uses: actions/download-artifact@v4 + with: + path: /tmp/out + - name: Extract branch name + id: extract_branch + run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" + - name: Publish pre-release to GitHub releases + if: ${{ github.ref == 'refs/heads/main-live-migration-pvm' || github.ref == 'refs/heads/main-live-migration' || github.ref == 'refs/heads/firecracker-v1.8-live-migration-pvm' || github.ref == 'refs/heads/firecracker-v1.8-live-migration' }} + uses: softprops/action-gh-release@v2 + with: + tag_name: release-${{ steps.extract_branch.outputs.branch }} + prerelease: true + files: | + /tmp/out/*/* + - name: Publish release to GitHub releases + if: startsWith(github.ref, 'refs/tags/v') + uses: softprops/action-gh-release@v2 + with: + prerelease: false + files: | + /tmp/out/*/* diff --git a/Hydrunfile b/Hydrunfile new file mode 100755 index 00000000000..77e4832c543 --- /dev/null +++ b/Hydrunfile @@ -0,0 +1,29 @@ +#!/bin/sh + +set -e + +# Rust +if [ "$1" = "rust" ]; then + # Install native dependencies + apk add rust cargo clang-dev cmake linux-headers make git + + # Configure Git + git config --global --add safe.directory '*' + + # Build + cp "resources/seccomp/$2-unknown-linux-musl.json" "resources/seccomp/$2-alpine-linux-musl.json" + export RUSTFLAGS='-C target-feature=+crt-static' + cargo build --package firecracker --package jailer --package seccompiler --package rebase-snap --package cpu-template-helper --target "$2-alpine-linux-musl" --all-features --release + + # Stage binaries + mkdir -p out + + dir="./build/cargo_target/$2-alpine-linux-musl/release" + for file in $(ls "$dir"); do + if [[ -x "$dir/$file" && ! -d "$dir/$file" ]]; then + cp "$dir/$file" "./out/${file}.linux-$2" + fi + done + + exit 0 +fi diff --git a/docs/cpu_templates/cpu-template-helper.md b/docs/cpu_templates/cpu-template-helper.md index dc7531f8544..a3c966fe45e 100644 --- a/docs/cpu_templates/cpu-template-helper.md +++ b/docs/cpu_templates/cpu-template-helper.md @@ -235,6 +235,7 @@ CPU features to a heterogeneous fleet consisting of multiple CPU models. | HV_X64_MSR_SYNDBG_PENDING_BUFFER | 0x400000f5 | | HV_X64_MSR_SYNDBG_OPTIONS | 0x400000ff | | HV_X64_MSR_TSC_INVARIANT_CONTROL | 0x40000118 | +| HV_X64_MSR_TSC_INVARIANT_CONTROL | 0x40000118 | ### ARM registers excluded from guest CPU configuration dump diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index dee7e2c1d39..c0dfde3d016 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index a735997383f..1e4fc14726c 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -35,6 +35,10 @@ { "syscall": "fsync" }, + { + "syscall": "msync", + "comment": "Used for live migration to sync dirty pages" + }, { "syscall": "close" }, diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 80f5510abe1..2c54c50f0d9 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -158,6 +158,14 @@ impl ApiServer { &METRICS.latencies_us.diff_create_snapshot, "create diff snapshot", )), + SnapshotType::Msync => Some(( + &METRICS.latencies_us.diff_create_snapshot, + "memory synchronization snapshot", + )), + SnapshotType::MsyncAndState => Some(( + &METRICS.latencies_us.diff_create_snapshot, + "memory synchronization and state snapshot", + )), }, VmmAction::LoadSnapshot(_) => { Some((&METRICS.latencies_us.load_snapshot, "load snapshot")) diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 8878c224b5c..bce4b7abc70 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -105,6 +105,7 @@ fn parse_put_snapshot_load(body: &Body) -> Result { mem_backend, enable_diff_snapshots: snapshot_config.enable_diff_snapshots, resume_vm: snapshot_config.resume_vm, + shared: snapshot_config.shared, }; // Construct the `ParsedRequest` object. @@ -181,6 +182,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -208,6 +210,7 @@ mod tests { }, enable_diff_snapshots: true, resume_vm: false, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -235,6 +238,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!(parsed_request @@ -259,6 +263,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }; let parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert_eq!( diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 2db3b702588..59ddb98e79d 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -1199,6 +1199,8 @@ definitions: enum: - Full - Diff + - Msync + - MsyncAndState description: Type of snapshot to create. It is optional and by default, a full snapshot is created. @@ -1234,6 +1236,11 @@ definitions: type: boolean description: When set to true, the vm is also resumed if the snapshot load is successful. + shared: + type: boolean + description: When set to true and the guest memory backend is a file, + changes to the memory are asynchronously written back to the + backend as the VM is running. TokenBucket: type: object diff --git a/src/vmm/src/arch/x86_64/msr.rs b/src/vmm/src/arch/x86_64/msr.rs index b49b1b0348b..714d04ce872 100644 --- a/src/vmm/src/arch/x86_64/msr.rs +++ b/src/vmm/src/arch/x86_64/msr.rs @@ -49,7 +49,7 @@ const APIC_BASE_MSR: u32 = 0x800; /// Number of APIC MSR indexes const APIC_MSR_INDEXES: u32 = 0x400; -/// Custom MSRs fall in the range 0x4b564d00-0x4b564dff +/// /// Custom KVM MSRs fall in the range 0x4b564d00-0x4b564def (0x4b564df0-0x4b564dff is reserved for PVM) const MSR_KVM_WALL_CLOCK_NEW: u32 = 0x4b56_4d00; const MSR_KVM_SYSTEM_TIME_NEW: u32 = 0x4b56_4d01; const MSR_KVM_ASYNC_PF_EN: u32 = 0x4b56_4d02; @@ -58,6 +58,16 @@ const MSR_KVM_PV_EOI_EN: u32 = 0x4b56_4d04; const MSR_KVM_POLL_CONTROL: u32 = 0x4b56_4d05; const MSR_KVM_ASYNC_PF_INT: u32 = 0x4b56_4d06; +// Custom PVM MSRs fall in the range 0x4b564df0-0x4b564dff +const MSR_PVM_LINEAR_ADDRESS_RANGE: u32 = 0x4b56_4df0; +const MSR_PVM_VCPU_STRUCT: u32 = 0x4b56_4df1; +const MSR_PVM_SUPERVISOR_RSP: u32 = 0x4b56_4df2; +const MSR_PVM_SUPERVISOR_REDZONE: u32 = 0x4b56_4df3; +const MSR_PVM_EVENT_ENTRY: u32 = 0x4b56_4df4; +const MSR_PVM_RETU_RIP: u32 = 0x4b56_4df5; +const MSR_PVM_RETS_RIP: u32 = 0x4b56_4df6; +const MSR_PVM_SWITCH_CR3: u32 = 0x4b56_4df7; + /// Taken from arch/x86/include/asm/msr-index.h /// Spectre mitigations control MSR pub const MSR_IA32_SPEC_CTRL: u32 = 0x0000_0048; @@ -237,6 +247,14 @@ static SERIALIZABLE_MSR_RANGES: &[MsrRange] = &[ MSR_RANGE!(MSR_KVM_POLL_CONTROL), MSR_RANGE!(MSR_KVM_ASYNC_PF_INT), MSR_RANGE!(MSR_IA32_TSX_CTRL), + MSR_RANGE!(MSR_PVM_LINEAR_ADDRESS_RANGE), + MSR_RANGE!(MSR_PVM_VCPU_STRUCT), + MSR_RANGE!(MSR_PVM_SUPERVISOR_RSP), + MSR_RANGE!(MSR_PVM_SUPERVISOR_REDZONE), + MSR_RANGE!(MSR_PVM_EVENT_ENTRY), + MSR_RANGE!(MSR_PVM_RETU_RIP), + MSR_RANGE!(MSR_PVM_RETS_RIP), + MSR_RANGE!(MSR_PVM_SWITCH_CR3), ]; /// Specifies whether a particular MSR should be included in vcpu serialization. diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 26a755c8f4e..bc52dbeac18 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -604,6 +604,10 @@ pub struct PerformanceMetrics { pub full_create_snapshot: SharedStoreMetric, /// Measures the snapshot diff create time, at the API (user) level, in microseconds. pub diff_create_snapshot: SharedStoreMetric, + /// Measures the snapshot memory synchronization time, at the VMM level, in microseconds. + pub msync_create_snapshot: SharedStoreMetric, + /// Measures the snapshot memory synchronization and state time, at the VMM level, in microseconds. + pub msync_and_state_create_snapshot: SharedStoreMetric, /// Measures the snapshot load time, at the API (user) level, in microseconds. pub load_snapshot: SharedStoreMetric, /// Measures the microVM pausing duration, at the API (user) level, in microseconds. @@ -627,6 +631,8 @@ impl PerformanceMetrics { Self { full_create_snapshot: SharedStoreMetric::new(), diff_create_snapshot: SharedStoreMetric::new(), + msync_create_snapshot: SharedStoreMetric::new(), + msync_and_state_create_snapshot: SharedStoreMetric::new(), load_snapshot: SharedStoreMetric::new(), pause_vm: SharedStoreMetric::new(), resume_vm: SharedStoreMetric::new(), diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 6c8058899f2..8c313d56661 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -147,6 +147,8 @@ pub enum CreateSnapshotError { UnsupportedVersion, /// Cannot write memory file: {0} Memory(MemoryError), + /// Cannot msync memory file: {0} + MemoryMsync(MemoryError), /// Cannot perform {0} on the memory backing file: {1} MemoryBackingFile(&'static str, io::Error), /// Cannot save the microVM state: {0} @@ -168,11 +170,16 @@ pub fn create_snapshot( vm_info: &VmInfo, params: &CreateSnapshotParams, ) -> Result<(), CreateSnapshotError> { - let microvm_state = vmm - .save_state(vm_info) - .map_err(CreateSnapshotError::MicrovmState)?; + match params.snapshot_type { + SnapshotType::Diff | SnapshotType::Full | SnapshotType::MsyncAndState => { + let microvm_state = vmm + .save_state(vm_info) + .map_err(CreateSnapshotError::MicrovmState)?; - snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; + snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; + } + SnapshotType::Msync => (), + } snapshot_memory_to_file(vmm, ¶ms.mem_file_path, params.snapshot_type)?; @@ -216,63 +223,64 @@ fn snapshot_memory_to_file( ) -> Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; - // Need to check this here, as we create the file in the line below - let file_existed = mem_file_path.exists(); - - let mut file = OpenOptions::new() - .write(true) - .create(true) - .open(mem_file_path) - .map_err(|err| MemoryBackingFile("open", err))?; - - // Determine what size our total memory area is. - let mem_size_mib = mem_size_mib(vmm.guest_memory()); - let expected_size = mem_size_mib * 1024 * 1024; - - if file_existed { - let file_size = file - .metadata() - .map_err(|e| MemoryBackingFile("get_metadata", e))? - .len(); - - // Here we only truncate the file if the size mismatches. - // - For full snapshots, the entire file's contents will be overwritten anyway. We have to - // avoid truncating here to deal with the edge case where it represents the snapshot file - // from which this very microVM was loaded (as modifying the memory file would be - // reflected in the mmap of the file, meaning a truncate operation would zero out guest - // memory, and thus corrupt the VM). - // - For diff snapshots, we want to merge the diff layer directly into the file. - if file_size != expected_size { - file.set_len(0) - .map_err(|err| MemoryBackingFile("truncate", err))?; - } - } - - // Set the length of the file to the full size of the memory area. - file.set_len(expected_size) - .map_err(|e| MemoryBackingFile("set_length", e))?; - match snapshot_type { - SnapshotType::Diff => { - let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; - vmm.guest_memory() - .dump_dirty(&mut file, &dirty_bitmap) - .map_err(Memory) - } - SnapshotType::Full => { - let dump_res = vmm.guest_memory().dump(&mut file).map_err(Memory); - if dump_res.is_ok() { - vmm.reset_dirty_bitmap(); - vmm.guest_memory().reset_dirty(); + SnapshotType::Diff | SnapshotType::Full => { + // Need to check this here, as we create the file in the line below + let file_existed = mem_file_path.exists(); + + let mut file = OpenOptions::new() + .write(true) + .truncate(false) // No need to truncate; see file_existed check below + .create(true) + .open(mem_file_path) + .map_err(|err| MemoryBackingFile("open", err))?; + + // Determine what size our total memory area is. + let mem_size_mib = mem_size_mib(vmm.guest_memory()); + let expected_size = mem_size_mib * 1024 * 1024; + + if file_existed { + let file_size = file + .metadata() + .map_err(|e| MemoryBackingFile("get_metadata", e))? + .len(); + + // Here we only truncate the file if the size mismatches. + // - For full snapshots, the entire file's contents will be overwritten anyway. We have to + // avoid truncating here to deal with the edge case where it represents the snapshot file + // from which this very microVM was loaded (as modifying the memory file would be + // reflected in the mmap of the file, meaning a truncate operation would zero out guest + // memory, and thus corrupt the VM). + // - For diff snapshots, we want to merge the diff layer directly into the file. + if file_size != expected_size { + file.set_len(0) + .map_err(|err| MemoryBackingFile("truncate", err))?; + } } - dump_res + // Set the length of the file to the full size of the memory area. + file.set_len(expected_size) + .map_err(|e| MemoryBackingFile("set_length", e))?; + + match snapshot_type { + SnapshotType::Diff => { + let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; + vmm.guest_memory() + .dump_dirty(&mut file, &dirty_bitmap) + .map_err(Memory) + } + SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory), + _ => Ok(()), + }?; + file.flush() + .map_err(|err| MemoryBackingFile("flush", err))?; + file.sync_all() + .map_err(|err| MemoryBackingFile("sync_all", err)) } - }?; - file.flush() - .map_err(|err| MemoryBackingFile("flush", err))?; - file.sync_all() - .map_err(|err| MemoryBackingFile("sync_all", err)) + SnapshotType::Msync | SnapshotType::MsyncAndState => { + vmm.guest_memory().msync().map_err(MemoryMsync) + } + } } /// Validates that snapshot CPU vendor matches the host CPU vendor. @@ -434,6 +442,7 @@ pub fn restore_from_snapshot( mem_state, track_dirty_pages, vm_resources.vm_config.huge_pages, + params.shared, ) .map_err(RestoreFromSnapshotGuestMemoryError::File)?, None, @@ -467,7 +476,7 @@ pub enum SnapshotStateFromFileError { /// Failed to open snapshot file: {0} Open(std::io::Error), /// Failed to read snapshot file metadata: {0} - Meta(std::io::Error), + Meta(crate::snapshot::SnapshotError), /// Failed to load snapshot state from file: {0} Load(#[from] crate::snapshot::SnapshotError), } @@ -478,8 +487,9 @@ fn snapshot_state_from_file( let snapshot = Snapshot::new(SNAPSHOT_VERSION); let mut snapshot_reader = File::open(snapshot_path).map_err(SnapshotStateFromFileError::Open)?; - let metadata = std::fs::metadata(snapshot_path).map_err(SnapshotStateFromFileError::Meta)?; - let snapshot_len = u64_to_usize(metadata.len()); + let raw_snapshot_len: u64 = + Snapshot::deserialize(&mut snapshot_reader).map_err(SnapshotStateFromFileError::Meta)?; + let snapshot_len = u64_to_usize(raw_snapshot_len); let state: MicrovmState = snapshot .load_with_version_check(&mut snapshot_reader, snapshot_len) .map_err(SnapshotStateFromFileError::Load)?; @@ -500,10 +510,24 @@ fn guest_memory_from_file( mem_state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result { - let mem_file = File::open(mem_file_path)?; - let guest_mem = - GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; + let mem_file = if shared { + OpenOptions::new() + .read(true) + .write(true) + .open(mem_file_path)? + } else { + File::open(mem_file_path)? + }; + + let guest_mem = GuestMemoryMmap::from_state( + Some(&mem_file), + mem_state, + track_dirty_pages, + huge_pages, + shared, + )?; Ok(guest_mem) } @@ -562,7 +586,8 @@ fn create_guest_memory( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { - let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; + let guest_memory = + GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages, false)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) { backend_mappings.push(GuestRegionUffdMapping { diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index adb526de91c..11501904e06 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -795,6 +795,26 @@ impl RuntimeApiController { elapsed_time_us ); } + SnapshotType::Msync => { + let elapsed_time_us = update_metric_with_elapsed_time( + &METRICS.latencies_us.msync_create_snapshot, + create_start_us, + ); + info!( + "'create memory synchronization snapshot' VMM action took {} us.", + elapsed_time_us + ); + } + SnapshotType::MsyncAndState => { + let elapsed_time_us = update_metric_with_elapsed_time( + &METRICS.latencies_us.msync_and_state_create_snapshot, + create_start_us, + ); + info!( + "'create memory synchronization and state snapshot' VMM action took {} us.", + elapsed_time_us + ); + } } Ok(VmmData::Empty) } @@ -1737,6 +1757,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }); // Request should succeed. preboot.handle_preboot_request(req).unwrap(); @@ -1753,6 +1774,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: true, + shared: false, }); // Request should succeed. preboot.handle_preboot_request(req).unwrap(); @@ -2134,6 +2156,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }), VmmActionError::OperationNotSupportedPostBoot, ); @@ -2160,6 +2183,7 @@ mod tests { }, enable_diff_snapshots: false, resume_vm: false, + shared: false, }); let err = preboot.handle_preboot_request(req); assert_eq!( diff --git a/src/vmm/src/snapshot/mod.rs b/src/vmm/src/snapshot/mod.rs index 2a19a5d5298..6c3feeff37f 100644 --- a/src/vmm/src/snapshot/mod.rs +++ b/src/vmm/src/snapshot/mod.rs @@ -62,6 +62,10 @@ pub enum SnapshotError { Io(i32), /// An error occured with serialization/deserialization: {0} Serde(String), + /// Failed to flush snapshot CRC to snapshot buffer + Flush, + /// Failed to write snapshot buffer to snapshot file + Write, } /// Firecracker snapshot header @@ -204,12 +208,21 @@ impl Snapshot { T: Write + Debug, O: Serialize + Debug, { - let mut crc_writer = CRC64Writer::new(writer); + let mut snapshot_buf = Vec::new(); + + let mut crc_writer = CRC64Writer::new(&mut snapshot_buf); self.save_without_crc(&mut crc_writer, object)?; // Now write CRC value let checksum = crc_writer.checksum(); - Self::serialize(&mut crc_writer, &checksum) + Self::serialize(&mut crc_writer, &checksum)?; + + crc_writer.flush().map_err(|_| SnapshotError::Flush)?; + + let snapshot_len = snapshot_buf.len() as u64; + Self::serialize(writer, &snapshot_len)?; + + writer.write_all(&snapshot_buf).map_err(|_| SnapshotError::Write) } /// Save a snapshot with no CRC64 checksum included. diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index e1850b74939..43b03e563af 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -18,6 +18,10 @@ pub enum SnapshotType { /// Full snapshot. #[default] Full, + /// Memory synchronization snapshot. + Msync, + /// Memory synchronization and state snapshot. + MsyncAndState, } /// Specifies the method through which guest memory will get populated when @@ -60,6 +64,10 @@ pub struct LoadSnapshotParams { /// When set to true, the vm is also resumed if the snapshot load /// is successful. pub resume_vm: bool, + /// When set to true and the guest memory backend is a file, + /// changes to the memory are asynchronously written back to the + /// backend as the VM is running. + pub shared: bool, } /// Stores the configuration for loading a snapshot that is provided by the user. @@ -82,6 +90,9 @@ pub struct LoadSnapshotConfig { /// Whether or not to resume the vm post snapshot load. #[serde(default)] pub resume_vm: bool, + /// Whether or not to asynchronously write back memory changes to the backing file. + #[serde(default)] + pub shared: bool, } /// Stores the configuration used for managing snapshot memory. diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 5f0390ac72d..7c348d4a030 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -87,8 +87,12 @@ where state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result; + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError>; + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -230,6 +234,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, + shared: bool, ) -> Result { match file { Some(f) => { @@ -249,7 +254,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; - Self::from_raw_regions_file(regions, track_dirty_pages, false) + Self::from_raw_regions_file(regions, track_dirty_pages, shared) } None => { let regions = state @@ -262,6 +267,21 @@ impl GuestMemoryExtension for GuestMemoryMmap { } } + /// Flushes memory contents to disk. + fn msync(&self) -> std::result::Result<(), MemoryError> { + self.iter().for_each(|region| { + // SAFETY: It is safe to call `msync()` on both an anonymous (where it is a nop) and shared (where it flushes to disk) region + unsafe { + libc::msync( + region.as_ptr().cast::(), + region.size(), + libc::MS_SYNC, + ); + } + }); + Ok(()) + } + /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); @@ -541,9 +561,14 @@ mod tests { let file = TempFile::new().unwrap().into_file(); // No mapping of snapshots that were taken with hugetlbfs enabled - let err = - GuestMemoryMmap::from_state(Some(&file), &state, false, HugePageConfig::Hugetlbfs2M) - .unwrap_err(); + let err = GuestMemoryMmap::from_state( + Some(&file), + &state, + false, + HugePageConfig::Hugetlbfs2M, + false, + ) + .unwrap_err(); assert!(matches!(err, MemoryError::HugetlbfsSnapshot), "{:?}", err); } @@ -732,6 +757,7 @@ mod tests { &memory_state, false, HugePageConfig::None, + false, ) .unwrap(); @@ -790,9 +816,14 @@ mod tests { guest_memory.dump_dirty(&mut file, &dirty_bitmap).unwrap(); // We can restore from this because this is the first dirty dump. - let restored_guest_memory = - GuestMemoryMmap::from_state(Some(&file), &memory_state, false, HugePageConfig::None) - .unwrap(); + let restored_guest_memory = GuestMemoryMmap::from_state( + Some(&file), + &memory_state, + false, + HugePageConfig::None, + false, + ) + .unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 6067c252548..cd41de0c125 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -6,8 +6,12 @@ use std::thread; use std::time::Duration; use utils::tempfile::TempFile; +use utils::u64_to_usize; use vmm::builder::{build_and_boot_microvm, build_microvm_from_snapshot}; -use vmm::persist::{self, snapshot_state_sanity_check, MicrovmState, MicrovmStateError, VmInfo}; +use vmm::persist::{ + self, snapshot_state_sanity_check, MicrovmState, MicrovmStateError, SnapshotStateFromFileError, + VmInfo, +}; use vmm::resources::VmResources; use vmm::seccomp_filters::get_empty_filters; use vmm::snapshot::Snapshot; @@ -203,9 +207,10 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { vmm.lock().unwrap().stop(FcExitCode::Ok); // Check that we can deserialize the microVM state from `snapshot_file`. - let snapshot_path = snapshot_file.as_path().to_path_buf(); - let snapshot_file_metadata = std::fs::metadata(snapshot_path).unwrap(); - let snapshot_len = snapshot_file_metadata.len() as usize; + let raw_snapshot_len: u64 = Snapshot::deserialize(&mut snapshot_file.as_file()) + .map_err(SnapshotStateFromFileError::Meta) + .unwrap(); + let snapshot_len = u64_to_usize(raw_snapshot_len); let (restored_microvm_state, _) = Snapshot::load::<_, MicrovmState>(&mut snapshot_file.as_file(), snapshot_len).unwrap(); @@ -238,6 +243,7 @@ fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) { µvm_state.memory_state, false, HugePageConfig::None, + false, ) .unwrap(); @@ -298,11 +304,13 @@ fn test_snapshot_load_sanity_checks() { fn get_microvm_state_from_snapshot() -> MicrovmState { // Create a diff snapshot let (snapshot_file, _) = verify_create_snapshot(true); + snapshot_file.as_file().seek(SeekFrom::Start(0)).unwrap(); // Deserialize the microVM state. - let snapshot_file_metadata = snapshot_file.as_file().metadata().unwrap(); - let snapshot_len = snapshot_file_metadata.len() as usize; - snapshot_file.as_file().seek(SeekFrom::Start(0)).unwrap(); + let raw_snapshot_len: u64 = Snapshot::deserialize(&mut snapshot_file.as_file()) + .map_err(SnapshotStateFromFileError::Meta) + .unwrap(); + let snapshot_len = u64_to_usize(raw_snapshot_len); let (state, _) = Snapshot::load(&mut snapshot_file.as_file(), snapshot_len).unwrap(); state }