-
Notifications
You must be signed in to change notification settings - Fork 14
Loop tiling
Mamy Ratsimbazafy edited this page Nov 10, 2018
·
1 revision
TODO
-
Generating Families of Practical Fast Matrix Multiplication Algorithms,
- From FLAME/BLIS, Morton indexing (Z-order curve)
- Strassen Matrix Multiplication
- https://arxiv.org/pdf/1611.01120.pdf
-
The effect of reordering multi-dimensional array data on CPU cache utilisation
zorder64_inv:
movabsq $0x5555555555555555, %rax
pextq %rax, %rcx, %rdx
shrq %rcx
pextq %rax, %rcx, %rcx
shlq $32, %rcx
movl %edx, %eax
orq %rcx, %rax
retq
zorder64:
movl %ecx, %eax
movabsq $0x5555555555555555, %r8
pdepq %r8, %rax, %rcx
movl %edx, %eax
pdepq %r8, %rax, %rax
addq %rax, %rax
orq %rcx, %rax
retq