Skip to content

Commit

Permalink
Support larger stack sizes on aarch64 (#237)
Browse files Browse the repository at this point in the history
Includes additional fixes on top of #236

---------

Co-authored-by: Jacob Parker <[email protected]>
  • Loading branch information
mkeeter and j3parker authored Feb 2, 2025
1 parent 94335a9 commit 9f15579
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 60 deletions.
2 changes: 1 addition & 1 deletion fidget/src/core/eval/test/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ impl<F: Function + MathFunction> TestFloatSlice<F> {
}

pub fn test_f_stress() {
for n in [4, 8, 12, 16, 32] {
for n in [4, 8, 12, 16, 32, 256, 512] {
Self::test_f_stress_n(n);
}
}
Expand Down
7 changes: 1 addition & 6 deletions fidget/src/jit/aarch64/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,13 +454,8 @@ impl Assembler for FloatSliceAssembler {
; ldr x22, [sp, 0x210]
; ldr x23, [sp, 0x218]
; ldr x24, [sp, 0x220]

// Fix up the stack
; add sp, sp, self.0.mem_offset as u32
; ret
);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
8 changes: 1 addition & 7 deletions fidget/src/jit/aarch64/grad_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,14 +539,8 @@ impl Assembler for GradSliceAssembler {
; ldr x21, [sp, 0x208]
; ldr x22, [sp, 0x210]
; ldr x23, [sp, 0x218]

// Fix up the stack
; add sp, sp, self.0.mem_offset as u32
; ret

);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
10 changes: 2 additions & 8 deletions fidget/src/jit/aarch64/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,6 @@ impl Assembler for IntervalAssembler {
}

fn finalize(mut self) -> Result<Mmap, Error> {
assert!(self.0.mem_offset < 4096);
if self.0.saved_callee_regs {
dynasm!(self.0.ops
// Restore callee-saved registers
Expand All @@ -762,6 +761,7 @@ impl Assembler for IntervalAssembler {
; ldr x23, [sp, 0xf0]
)
}

dynasm!(self.0.ops
// Restore frame and link register
; ldp x29, x30, [sp, 0x0]
Expand All @@ -771,14 +771,8 @@ impl Assembler for IntervalAssembler {
; ldp d10, d11, [sp, 0x20]
; ldp d12, d13, [sp, 0x30]
; ldp d14, d15, [sp, 0x40]

// Fix up the stack
; add sp, sp, self.0.mem_offset as u32

; ret
);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
8 changes: 1 addition & 7 deletions fidget/src/jit/aarch64/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,14 +472,8 @@ impl Assembler for PointAssembler {
; ldp d10, d11, [sp, 0x20]
; ldp d12, d13, [sp, 0x30]
; ldp d14, d15, [sp, 0x40]

// Fix up the stack
; add sp, sp, (self.0.mem_offset as u32)

; ret
);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
50 changes: 46 additions & 4 deletions fidget/src/jit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,18 @@ impl<T> AssemblerData<T> {

#[cfg(target_arch = "aarch64")]
fn push_stack(&mut self) {
assert!(self.mem_offset < 4096);
dynasm!(self.ops
; sub sp, sp, self.mem_offset as u32
);
if self.mem_offset < 4096 {
dynasm!(self.ops
; sub sp, sp, self.mem_offset as u32
);
} else if self.mem_offset < 65536 {
dynasm!(self.ops
; mov w28, self.mem_offset as u64
; sub sp, sp, w28
);
} else {
panic!("invalid mem offset: {} is too large", self.mem_offset);
}
}

#[cfg(target_arch = "x86_64")]
Expand All @@ -348,6 +356,40 @@ impl<T> AssemblerData<T> {
assert!(slot >= REGISTER_LIMIT as u32);
(slot - REGISTER_LIMIT as u32) * std::mem::size_of::<T>() as u32
}

#[cfg(target_arch = "aarch64")]
fn finalize(mut self) -> Result<Mmap, Error> {
// Fix up the stack
if self.mem_offset < 4096 {
dynasm!(self.ops
; add sp, sp, self.mem_offset as u32
);
} else if self.mem_offset < 65536 {
dynasm!(self.ops
; mov w9, self.mem_offset as u64
; add sp, sp, w9
);
} else {
panic!("invalid mem offset: {}", self.mem_offset);
}

dynasm!(self.ops
; ret
);
self.ops.finalize()
}

#[cfg(target_arch = "x86_64")]
fn finalize(mut self) -> Result<Mmap, Error> {
dynasm!(self.ops
; add rsp, self.mem_offset as i32
; pop rbp
; emms
; vzeroall
; ret
);
self.ops.finalize()
}
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 1 addition & 7 deletions fidget/src/jit/x86_64/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,14 +383,8 @@ impl Assembler for FloatSliceAssembler {

// Finalization code, which happens after all evaluation is complete
; ->X:
; add rsp, self.0.mem_offset as i32
; pop rbp
; emms
; vzeroall
; ret
);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
7 changes: 1 addition & 6 deletions fidget/src/jit/x86_64/grad_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,8 @@ impl Assembler for GradSliceAssembler {

// Finalization code, which happens after all evaluation is complete
; -> X:
; add rsp, self.0.mem_offset as i32
; pop rbp
; emms
; ret
);

self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
8 changes: 1 addition & 7 deletions fidget/src/jit/x86_64/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -771,13 +771,7 @@ impl Assembler for IntervalAssembler {
; mov r15, [rbp - 0x20]
);
}
dynasm!(self.0.ops
; add rsp, self.0.mem_offset as i32
; pop rbp
; emms
; ret
);
self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down
8 changes: 1 addition & 7 deletions fidget/src/jit/x86_64/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,13 +419,7 @@ impl Assembler for PointAssembler {
; mov r15, [rbp - 0x20]
);
}
dynasm!(self.0.ops
; add rsp, self.0.mem_offset as i32
; pop rbp
; emms
; ret
);
self.0.ops.finalize()
self.0.finalize()
}
}

Expand Down

0 comments on commit 9f15579

Please sign in to comment.