diff --git a/benches/vm_execution.rs b/benches/vm_execution.rs index aecac29f..d39b47c4 100644 --- a/benches/vm_execution.rs +++ b/benches/vm_execution.rs @@ -251,17 +251,16 @@ fn bench_jit_vs_interpreter_call_depth_dynamic(bencher: &mut Bencher) { jlt r6, 1024, -4 exit function_foo: - add r11, -4 - stw [r10-4], 0x11223344 + add r10, -64 + stw [r10+4], 0x11223344 mov r6, r1 jeq r6, 0, +3 mov r1, r6 add r1, -1 call function_foo - add r11, 4 exit", Config::default(), - 176130, + 156674, &mut [], ); } diff --git a/doc/bytecode.md b/doc/bytecode.md index 1fd8c679..c2aa83a7 100644 --- a/doc/bytecode.md +++ b/doc/bytecode.md @@ -19,7 +19,6 @@ All of them are 64 bit wide. | `r8` | all | GPR | Call-preserved | `r9` | all | GPR | Call-preserved | `r10` | all | Frame pointer | System register -| `r11` | from v1 | Stack pointer | System register | `pc` | all | Program counter | Hidden register @@ -258,7 +257,6 @@ Except that the target location of `callx` is the src register, thus runtime dyn Call instructions (`call` and `callx` but not `syscall`) do: - Save the registers `r6`, `r7`, `r8`, `r9`, the frame pointer `r10` and the `pc` (pointing at the next instruction) - If < v1: Add one stack frame size to the frame pointer `r10` -- If ≥ v1: Move the stack pointer `r11` into the frame pointer `r10` The `exit` (a.k.a. return) instruction does: - Restore the registers `r6`, `r7`, `r8`, `r9`, the frame pointer `r10` and the `pc` @@ -324,13 +322,12 @@ Verification - For all instructions the source register must be `r0` ≤ src ≤ `r10` - For all instructions (except for memory writes) the destination register must be `r0` ≤ dst ≤ `r9` - For all instructions the opcode must be valid -- Memory write instructions can use `r10` as destination register ### until v1 -- No instruction can use `r11` as destination register +- Only memory write instruction can use `r10` as destination register ### from v1 -- `add64 reg, imm` can use `r11` as destination register +- `add64 reg, imm` can also use `r10` as destination register ### until v2 - Opcodes from the product / quotient / remainder instruction class are forbiden diff --git a/src/ebpf.rs b/src/ebpf.rs index 2558b9e7..35bdcb36 100644 --- a/src/ebpf.rs +++ b/src/ebpf.rs @@ -30,8 +30,6 @@ pub const PROG_MAX_INSNS: usize = 65_536; pub const INSN_SIZE: usize = 8; /// Frame pointer register pub const FRAME_PTR_REG: usize = 10; -/// Stack pointer register -pub const STACK_PTR_REG: usize = 11; /// First scratch register pub const FIRST_SCRATCH_REG: usize = 6; /// Number of scratch registers diff --git a/src/interpreter.rs b/src/interpreter.rs index 1fd1dd61..573a7cb7 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -13,7 +13,7 @@ //! Interpreter for eBPF programs. use crate::{ - ebpf::{self, STACK_PTR_REG}, + ebpf, elf::Executable, error::{EbpfError, ProgramResult}, program::BuiltinFunction, @@ -146,9 +146,8 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> { // With fixed frames we start the new frame at the next fixed offset let stack_frame_size = config.stack_frame_size * if config.enable_stack_frame_gaps { 2 } else { 1 }; - self.vm.stack_pointer += stack_frame_size as u64; + self.reg[ebpf::FRAME_PTR_REG] += stack_frame_size as u64; } - self.reg[ebpf::FRAME_PTR_REG] = self.vm.stack_pointer; true } @@ -189,16 +188,6 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> { } match insn.opc { - ebpf::ADD64_IMM if dst == STACK_PTR_REG && self.executable.get_sbpf_version().dynamic_stack_frames() => { - // Let the stack overflow. For legitimate programs, this is a nearly - // impossible condition to hit since programs are metered and we already - // enforce a maximum call depth. For programs that intentionally mess - // around with the stack pointer, MemoryRegion::map will return - // InvalidVirtualAddress(stack_ptr) once an invalid stack address is - // accessed. - self.vm.stack_pointer = self.vm.stack_pointer.overflowing_add(insn.imm as u64).0; - } - ebpf::LD_DW_IMM if !self.executable.get_sbpf_version().disable_lddw() => { ebpf::augment_lddw_unchecked(self.program, &mut insn); self.reg[dst] = insn.imm as u64; @@ -584,11 +573,6 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> { self.reg[ebpf::FIRST_SCRATCH_REG ..ebpf::FIRST_SCRATCH_REG + ebpf::SCRATCH_REGS] .copy_from_slice(&frame.caller_saved_registers); - if !self.executable.get_sbpf_version().dynamic_stack_frames() { - let stack_frame_size = - config.stack_frame_size * if config.enable_stack_frame_gaps { 2 } else { 1 }; - self.vm.stack_pointer -= stack_frame_size as u64; - } check_pc!(self, next_pc, frame.target_pc); } _ => throw_error!(self, EbpfError::UnsupportedInstruction), diff --git a/src/jit.rs b/src/jit.rs index c9eff7d5..dc908df3 100644 --- a/src/jit.rs +++ b/src/jit.rs @@ -24,7 +24,7 @@ use rand::{ use std::{fmt::Debug, mem, ptr}; use crate::{ - ebpf::{self, FIRST_SCRATCH_REG, FRAME_PTR_REG, INSN_SIZE, SCRATCH_REGS, STACK_PTR_REG}, + ebpf::{self, FIRST_SCRATCH_REG, FRAME_PTR_REG, INSN_SIZE, SCRATCH_REGS}, elf::Executable, error::{EbpfError, ProgramResult}, memory_management::{ @@ -255,15 +255,14 @@ struct Jump { enum RuntimeEnvironmentSlot { HostStackPointer = 0, CallDepth = 1, - StackPointer = 2, - ContextObjectPointer = 3, - PreviousInstructionMeter = 4, - DueInsnCount = 5, - StopwatchNumerator = 6, - StopwatchDenominator = 7, - Registers = 8, - ProgramResult = 20, - MemoryMapping = 28, + ContextObjectPointer = 2, + PreviousInstructionMeter = 3, + DueInsnCount = 4, + StopwatchNumerator = 5, + StopwatchDenominator = 6, + Registers = 7, + ProgramResult = 19, + MemoryMapping = 27, } /* Explanation of the Instruction Meter @@ -418,16 +417,11 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_SCRATCH, 0)); } - let dst = if insn.dst == STACK_PTR_REG as u8 { u8::MAX } else { REGISTER_MAP[insn.dst as usize] }; + let dst = if insn.dst == FRAME_PTR_REG as u8 { u8::MAX } else { REGISTER_MAP[insn.dst as usize] }; let src = REGISTER_MAP[insn.src as usize]; let target_pc = (self.pc as isize + insn.off as isize + 1) as usize; match insn.opc { - ebpf::ADD64_IMM if insn.dst == STACK_PTR_REG as u8 && self.executable.get_sbpf_version().dynamic_stack_frames() => { - let stack_ptr_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer)); - self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, insn.imm, Some(stack_ptr_access))); - } - ebpf::LD_DW_IMM if !self.executable.get_sbpf_version().disable_lddw() => { self.emit_validate_and_profile_instruction_count(false, Some(self.pc + 2)); self.pc += 1; @@ -754,25 +748,16 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { self.emit_validate_instruction_count(Some(self.pc)); let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth)); - self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], call_depth_access)); - - // If CallDepth == 0, we've reached the exit instruction of the entry point - self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_MAP[FRAME_PTR_REG], 0, None)); + // If env.call_depth == 0, we've reached the exit instruction of the entry point + self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, 0, Some(call_depth_access))); if self.config.enable_instruction_meter { self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_SCRATCH, self.pc as i64)); } // we're done self.emit_ins(X86Instruction::conditional_jump_immediate(0x84, self.relative_to_anchor(ANCHOR_EXIT, 6))); - // else decrement and update CallDepth - self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_MAP[FRAME_PTR_REG], 1, None)); - self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], REGISTER_PTR_TO_VM, call_depth_access)); - - if !self.executable.get_sbpf_version().dynamic_stack_frames() { - let stack_pointer_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer)); - let stack_frame_size = self.config.stack_frame_size as i64 * if self.config.enable_stack_frame_gaps { 2 } else { 1 }; - self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_PTR_TO_VM, stack_frame_size, Some(stack_pointer_access))); // env.stack_pointer -= stack_frame_size; - } + // else decrement and update env.call_depth + self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth -= 1; // and return self.emit_profile_instruction_count(false, Some(0)); @@ -1540,23 +1525,18 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { // Push the caller's frame pointer. The code to restore it is emitted at the end of emit_internal_call(). self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], RSP, X86IndirectAccess::OffsetIndexShift(8, RSP, 0))); self.emit_ins(X86Instruction::xchg(OperandSize::S64, REGISTER_SCRATCH, RSP, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0)))); // Push return address and restore original REGISTER_SCRATCH - - // Increase CallDepth + // Increase env.call_depth let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth)); - self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); - self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], call_depth_access)); - // If CallDepth == self.config.max_call_depth, stop and return CallDepthExceeded - self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_MAP[FRAME_PTR_REG], self.config.max_call_depth as i64, None)); + self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth += 1; + // If env.call_depth == self.config.max_call_depth, throw CallDepthExceeded + self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, self.config.max_call_depth as i64, Some(call_depth_access))); self.emit_ins(X86Instruction::conditional_jump_immediate(0x83, self.relative_to_anchor(ANCHOR_CALL_DEPTH_EXCEEDED, 6))); - // Setup the frame pointer for the new frame. What we do depends on whether we're using dynamic or fixed frames. - let stack_pointer_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer)); if !self.executable.get_sbpf_version().dynamic_stack_frames() { // With fixed frames we start the new frame at the next fixed offset let stack_frame_size = self.config.stack_frame_size as i64 * if self.config.enable_stack_frame_gaps { 2 } else { 1 }; - self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, stack_frame_size, Some(stack_pointer_access))); // env.stack_pointer += stack_frame_size; + self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_MAP[FRAME_PTR_REG], stack_frame_size, None)); // REGISTER_MAP[FRAME_PTR_REG] += stack_frame_size; } - self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], stack_pointer_access)); // reg[ebpf::FRAME_PTR_REG] = env.stack_pointer; self.emit_ins(X86Instruction::return_near()); // Routine for emit_internal_call(Value::Register()) @@ -1757,7 +1737,6 @@ mod tests { check_slot!(env, host_stack_pointer, HostStackPointer); check_slot!(env, call_depth, CallDepth); - check_slot!(env, stack_pointer, StackPointer); check_slot!(env, context_object_pointer, ContextObjectPointer); check_slot!(env, previous_instruction_meter, PreviousInstructionMeter); check_slot!(env, due_insn_count, DueInsnCount); diff --git a/src/verifier.rs b/src/verifier.rs index e676ef91..ad8f59d1 100644 --- a/src/verifier.rs +++ b/src/verifier.rs @@ -76,6 +76,9 @@ pub enum VerifierError { /// Invalid syscall #[error("Invalid syscall code {0}")] InvalidSyscall(u32), + /// Unaligned immediate + #[error("Unaligned immediate (insn #{0})")] + UnalignedImmediate(usize), } /// eBPF Verifier @@ -121,6 +124,18 @@ fn check_imm_endian(insn: &ebpf::Insn, insn_ptr: usize) -> Result<(), VerifierEr } } +fn check_imm_aligned( + insn: &ebpf::Insn, + insn_ptr: usize, + alignment: i64, +) -> Result<(), VerifierError> { + if (insn.imm & (alignment - 1)) == 0 { + Ok(()) + } else { + Err(VerifierError::UnalignedImmediate(insn_ptr)) + } +} + fn check_load_dw(prog: &[u8], insn_ptr: usize) -> Result<(), VerifierError> { if (insn_ptr + 1) * ebpf::INSN_SIZE >= prog.len() { // Last instruction cannot be LD_DW because there would be no 2nd DW @@ -184,7 +199,7 @@ fn check_registers( match (insn.dst, store) { (0..=9, _) | (10, true) => Ok(()), - (11, _) if sbpf_version.dynamic_stack_frames() && insn.opc == ebpf::ADD64_IMM => Ok(()), + (10, false) if sbpf_version.dynamic_stack_frames() && insn.opc == ebpf::ADD64_IMM => Ok(()), (10, false) => Err(VerifierError::CannotWriteR10(insn_ptr)), (_, _) => Err(VerifierError::InvalidDestinationRegister(insn_ptr)), } @@ -305,6 +320,9 @@ impl Verifier for RequisiteVerifier { ebpf::BE => { check_imm_endian(&insn, insn_ptr)?; }, // BPF_ALU64_STORE class + ebpf::ADD64_IMM if insn.dst == ebpf::FRAME_PTR_REG as u8 && sbpf_version.dynamic_stack_frames() => { + check_imm_aligned(&insn, insn_ptr, 64)?; + }, ebpf::ADD64_IMM => {}, ebpf::ADD64_REG => {}, ebpf::SUB64_IMM => {}, diff --git a/src/vm.rs b/src/vm.rs index 18ad7d5d..3afa01b6 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -287,13 +287,6 @@ pub struct EbpfVm<'a, C: ContextObject> { /// Incremented on calls and decremented on exits. It's used to enforce /// config.max_call_depth and to know when to terminate execution. pub call_depth: u64, - /// Guest stack pointer (r11). - /// - /// The stack pointer isn't exposed as an actual register. Only sub and add - /// instructions (typically generated by the LLVM backend) are allowed to - /// access it when sbpf_version.dynamic_stack_frames()=true. Its value is only - /// stored here and therefore the register is not tracked in REGISTER_MAP. - pub stack_pointer: u64, /// Pointer to ContextObject pub context_object_pointer: &'a mut C, /// Last return value of instruction_meter.get_remaining() @@ -329,7 +322,8 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> { stack_len: usize, ) -> Self { let config = loader.get_config(); - let stack_pointer = + let mut registers = [0u64; 12]; + registers[ebpf::FRAME_PTR_REG] = ebpf::MM_STACK_START.saturating_add(if sbpf_version.dynamic_stack_frames() { // the stack is fully descending, frames start as empty and change size anytime r11 is modified stack_len @@ -343,13 +337,12 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> { EbpfVm { host_stack_pointer: std::ptr::null_mut(), call_depth: 0, - stack_pointer, context_object_pointer: context_object, previous_instruction_meter: 0, due_insn_count: 0, stopwatch_numerator: 0, stopwatch_denominator: 0, - registers: [0u64; 12], + registers, program_result: ProgramResult::Ok(0), memory_mapping, call_frames: vec![CallFrame::default(); config.max_call_depth], @@ -368,9 +361,7 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> { interpreted: bool, ) -> (u64, ProgramResult) { debug_assert!(Arc::ptr_eq(&self.loader, executable.get_loader())); - // R1 points to beginning of input memory, R10 to the stack of the first frame, R11 is the pc (hidden) self.registers[1] = ebpf::MM_INPUT_START; - self.registers[ebpf::FRAME_PTR_REG] = self.stack_pointer; self.registers[11] = executable.get_entrypoint_instruction_offset() as u64; let config = executable.get_config(); let initial_insn_count = if config.enable_instruction_meter { diff --git a/tests/execution.rs b/tests/execution.rs index b7614ca4..9573c918 100644 --- a/tests/execution.rs +++ b/tests/execution.rs @@ -2087,14 +2087,13 @@ fn test_err_dynamic_stack_ptr_overflow() { // See the comment in CallFrames::resize_stack() for the reason why it's // safe to let the stack pointer overflow - // stack_ptr -= stack_ptr + 1 test_interpreter_and_jit_asm!( " - add r11, -0x7FFFFFFF - add r11, -0x7FFFFFFF - add r11, -0x7FFFFFFF - add r11, -0x7FFFFFFF - add r11, -0x40005 + add r10, -0x7FFFFF00 + add r10, -0x7FFFFF00 + add r10, -0x7FFFFF00 + add r10, -0x7FFFFF00 + add r10, -0x40440 call function_foo exit function_foo: @@ -2104,7 +2103,7 @@ fn test_err_dynamic_stack_ptr_overflow() { TestContextObject::new(7), ProgramResult::Err(EbpfError::AccessViolation( AccessType::Store, - u64::MAX, + u64::MAX - 63, 1, "unknown" )), @@ -2134,11 +2133,26 @@ fn test_dynamic_stack_frames_empty() { fn test_dynamic_frame_ptr() { let config = Config::default(); - // Check that upon entering a function (foo) the frame pointer is advanced - // to the top of the stack + // Check that changes to r10 are immediately visible test_interpreter_and_jit_asm!( " - add r11, -8 + add r10, -64 + stxdw [r10+8], r10 + call function_foo + ldxdw r0, [r10+8] + exit + function_foo: + exit", + config.clone(), + [], + TestContextObject::new(6), + ProgramResult::Ok(ebpf::MM_STACK_START + config.stack_size() as u64 - 64), + ); + + // Check that changes to r10 continue to be visible in a callee + test_interpreter_and_jit_asm!( + " + add r10, -64 call function_foo exit function_foo: @@ -2147,18 +2161,17 @@ fn test_dynamic_frame_ptr() { config.clone(), [], TestContextObject::new(5), - ProgramResult::Ok(ebpf::MM_STACK_START + config.stack_size() as u64 - 8), + ProgramResult::Ok(ebpf::MM_STACK_START + config.stack_size() as u64 - 64), ); - // And check that when exiting a function (foo) the caller's frame pointer - // is restored + // And check that changes to r10 are undone after returning test_interpreter_and_jit_asm!( " - add r11, -8 call function_foo mov r0, r10 exit function_foo: + add r10, -64 exit ", config.clone(), diff --git a/tests/verifier.rs b/tests/verifier.rs index 6234ba23..d6ff690d 100644 --- a/tests/verifier.rs +++ b/tests/verifier.rs @@ -225,8 +225,8 @@ fn test_verifier_err_invalid_reg_src() { fn test_verifier_resize_stack_ptr_success() { let executable = assemble::( " - add r11, -1 - add r11, 1 + add r10, -64 + add r10, 64 exit", Arc::new(BuiltinProgram::new_loader( Config { @@ -240,6 +240,32 @@ fn test_verifier_resize_stack_ptr_success() { executable.verify::().unwrap(); } +#[test] +#[should_panic(expected = "UnalignedImmediate(0)")] +fn test_verifier_negative_unaligned_stack() { + let executable = assemble::( + " + add r10, -63 + exit", + Arc::new(BuiltinProgram::new_mock()), + ) + .unwrap(); + executable.verify::().unwrap(); +} + +#[test] +#[should_panic(expected = "UnalignedImmediate(0)")] +fn test_verifier_positive_unaligned_stack() { + let executable = assemble::( + " + add r10, 63 + exit", + Arc::new(BuiltinProgram::new_mock()), + ) + .unwrap(); + executable.verify::().unwrap(); +} + #[test] #[should_panic(expected = "JumpToMiddleOfLDDW(2, 0)")] fn test_verifier_err_jmp_lddw() {