-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: James Elliott <[email protected]>
- Loading branch information
1 parent
088cab2
commit 27cc348
Showing
12 changed files
with
4,890 additions
and
475 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,287 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package main | ||
|
||
import ( | ||
_ "github.com/go-crypt/x/argon2" | ||
. "github.com/mmcloughlin/avo/build" | ||
. "github.com/mmcloughlin/avo/operand" | ||
. "github.com/mmcloughlin/avo/reg" | ||
) | ||
|
||
//go:generate go run . -out ../blamka_amd64.s -pkg argon2 | ||
|
||
func main() { | ||
Package("github.com/go-crypt/x/argon2") | ||
ConstraintExpr("amd64,gc,!purego") | ||
|
||
blamkaSSE4() | ||
mixBlocksSSE2() | ||
xorBlocksSSE2() | ||
Generate() | ||
} | ||
|
||
func blamkaSSE4() { | ||
Implement("blamkaSSE4") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("b"), RAX) | ||
|
||
c40 := c40_DATA() | ||
c48 := c48_DATA() | ||
|
||
MOVOU(c40, X10) | ||
MOVOU(c48, X11) | ||
|
||
BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) | ||
|
||
BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) | ||
RET() | ||
} | ||
|
||
func mixBlocksSSE2() { | ||
Implement("mixBlocksSSE2") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("out"), RDX) | ||
Load(Param("a"), RAX) | ||
Load(Param("b"), RBX) | ||
Load(Param("c"), RCX) | ||
MOVQ(U32(128), RDI) | ||
|
||
Label("loop") | ||
MOVOU(Mem{Base: AX}.Offset(0), X0) | ||
MOVOU(Mem{Base: BX}.Offset(0), X1) | ||
MOVOU(Mem{Base: CX}.Offset(0), X2) | ||
PXOR(X1, X0) | ||
PXOR(X2, X0) | ||
MOVOU(X0, Mem{Base: DX}.Offset(0)) | ||
ADDQ(Imm(16), RAX) | ||
ADDQ(Imm(16), RBX) | ||
ADDQ(Imm(16), RCX) | ||
ADDQ(Imm(16), RDX) | ||
SUBQ(Imm(2), RDI) | ||
JA(LabelRef("loop")) | ||
RET() | ||
} | ||
|
||
func xorBlocksSSE2() { | ||
Implement("xorBlocksSSE2") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("out"), RDX) | ||
Load(Param("a"), RAX) | ||
Load(Param("b"), RBX) | ||
Load(Param("c"), RCX) | ||
MOVQ(U32(128), RDI) | ||
|
||
Label("loop") | ||
MOVOU(Mem{Base: AX}.Offset(0), X0) | ||
MOVOU(Mem{Base: BX}.Offset(0), X1) | ||
MOVOU(Mem{Base: CX}.Offset(0), X2) | ||
MOVOU(Mem{Base: DX}.Offset(0), X3) | ||
PXOR(X1, X0) | ||
PXOR(X2, X0) | ||
PXOR(X3, X0) | ||
MOVOU(X0, Mem{Base: DX}.Offset(0)) | ||
ADDQ(Imm(16), RAX) | ||
ADDQ(Imm(16), RBX) | ||
ADDQ(Imm(16), RCX) | ||
ADDQ(Imm(16), RDX) | ||
SUBQ(Imm(2), RDI) | ||
JA(LabelRef("loop")) | ||
RET() | ||
} | ||
|
||
func SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2 VecPhysical) { | ||
MOVO(v4, t1) | ||
MOVO(v5, v4) | ||
MOVO(t1, v5) | ||
MOVO(v6, t1) | ||
PUNPCKLQDQ(v6, t2) | ||
PUNPCKHQDQ(v7, v6) | ||
PUNPCKHQDQ(t2, v6) | ||
PUNPCKLQDQ(v7, t2) | ||
MOVO(t1, v7) | ||
MOVO(v2, t1) | ||
PUNPCKHQDQ(t2, v7) | ||
PUNPCKLQDQ(v3, t2) | ||
PUNPCKHQDQ(t2, v2) | ||
PUNPCKLQDQ(t1, t2) | ||
PUNPCKHQDQ(t2, v3) | ||
} | ||
|
||
func SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2 VecPhysical) { | ||
MOVO(v4, t1) | ||
MOVO(v5, v4) | ||
MOVO(t1, v5) | ||
MOVO(v2, t1) | ||
PUNPCKLQDQ(v2, t2) | ||
PUNPCKHQDQ(v3, v2) | ||
PUNPCKHQDQ(t2, v2) | ||
PUNPCKLQDQ(v3, t2) | ||
MOVO(t1, v3) | ||
MOVO(v6, t1) | ||
PUNPCKHQDQ(t2, v3) | ||
PUNPCKLQDQ(v7, t2) | ||
PUNPCKHQDQ(t2, v6) | ||
PUNPCKLQDQ(t1, t2) | ||
PUNPCKHQDQ(t2, v7) | ||
} | ||
|
||
func HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48 VecPhysical) { | ||
MOVO(v0, t0) | ||
PMULULQ(v2, t0) | ||
PADDQ(v2, v0) | ||
PADDQ(t0, v0) | ||
PADDQ(t0, v0) | ||
PXOR(v0, v6) | ||
PSHUFD(Imm(0xB1), v6, v6) | ||
MOVO(v4, t0) | ||
PMULULQ(v6, t0) | ||
PADDQ(v6, v4) | ||
PADDQ(t0, v4) | ||
PADDQ(t0, v4) | ||
PXOR(v4, v2) | ||
PSHUFB(c40, v2) | ||
MOVO(v0, t0) | ||
PMULULQ(v2, t0) | ||
PADDQ(v2, v0) | ||
PADDQ(t0, v0) | ||
PADDQ(t0, v0) | ||
PXOR(v0, v6) | ||
PSHUFB(c48, v6) | ||
MOVO(v4, t0) | ||
PMULULQ(v6, t0) | ||
PADDQ(v6, v4) | ||
PADDQ(t0, v4) | ||
PADDQ(t0, v4) | ||
PXOR(v4, v2) | ||
MOVO(v2, t0) | ||
PADDQ(v2, t0) | ||
PSRLQ(Imm(63), v2) | ||
PXOR(t0, v2) | ||
MOVO(v1, t0) | ||
PMULULQ(v3, t0) | ||
PADDQ(v3, v1) | ||
PADDQ(t0, v1) | ||
PADDQ(t0, v1) | ||
PXOR(v1, v7) | ||
PSHUFD(Imm(0xB1), v7, v7) | ||
MOVO(v5, t0) | ||
PMULULQ(v7, t0) | ||
PADDQ(v7, v5) | ||
PADDQ(t0, v5) | ||
PADDQ(t0, v5) | ||
PXOR(v5, v3) | ||
PSHUFB(c40, v3) | ||
MOVO(v1, t0) | ||
PMULULQ(v3, t0) | ||
PADDQ(v3, v1) | ||
PADDQ(t0, v1) | ||
PADDQ(t0, v1) | ||
PXOR(v1, v7) | ||
PSHUFB(c48, v7) | ||
MOVO(v5, t0) | ||
PMULULQ(v7, t0) | ||
PADDQ(v7, v5) | ||
PADDQ(t0, v5) | ||
PADDQ(t0, v5) | ||
PXOR(v5, v3) | ||
MOVO(v3, t0) | ||
PADDQ(v3, t0) | ||
PSRLQ(Imm(63), v3) | ||
PXOR(t0, v3) | ||
} | ||
|
||
func LOAD_MSG_0(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(Mem{Base: block}.Offset(8*(off+(i*2))), r) | ||
} | ||
} | ||
|
||
func STORE_MSG_0(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(r, Mem{Base: block}.Offset(8*(off+(i*2)))) | ||
} | ||
} | ||
|
||
func LOAD_MSG_1(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(Mem{Base: block}.Offset(8*off+i*16*8), r) | ||
} | ||
} | ||
|
||
func STORE_MSG_1(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(r, Mem{Base: block}.Offset(8*off+i*16*8)) | ||
} | ||
} | ||
|
||
func BLAMKA_ROUND_0(block GPPhysical, off int, t0, t1, c40, c48 VecPhysical) { | ||
LOAD_MSG_0(block, off) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1) | ||
STORE_MSG_0(block, off) | ||
} | ||
|
||
func BLAMKA_ROUND_1(block GPPhysical, off int, t0, t1, c40, c48 VecPhysical) { | ||
LOAD_MSG_1(block, off) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1) | ||
STORE_MSG_1(block, off) | ||
} | ||
|
||
// ##------------------DATA SECTION-------------------## | ||
|
||
var c40_DATA_ptr, c48_DATA_ptr *Mem | ||
|
||
func c40_DATA() Mem { | ||
if c40_DATA_ptr != nil { | ||
return *c40_DATA_ptr | ||
} | ||
|
||
c40_DATA := GLOBL("·c40", NOPTR|RODATA) | ||
c40_DATA_ptr = &c40_DATA | ||
DATA(0x00, U64(0x0201000706050403)) | ||
DATA(0x08, U64(0x0a09080f0e0d0c0b)) | ||
return c40_DATA | ||
} | ||
func c48_DATA() Mem { | ||
if c48_DATA_ptr != nil { | ||
return *c48_DATA_ptr | ||
} | ||
|
||
c48_DATA := GLOBL("·c48", NOPTR|RODATA) | ||
c48_DATA_ptr = &c48_DATA | ||
DATA(0x00, U64(0x0100070605040302)) | ||
DATA(0x08, U64(0x09080f0e0d0c0b0a)) | ||
return c48_DATA | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
module argon2/_asm | ||
|
||
go 1.23 | ||
|
||
require ( | ||
github.com/go-crypt/x v0.2.21 | ||
github.com/mmcloughlin/avo v0.6.0 | ||
) | ||
|
||
require ( | ||
golang.org/x/mod v0.21.0 // indirect | ||
golang.org/x/sync v0.8.0 // indirect | ||
golang.org/x/sys v0.25.0 // indirect | ||
golang.org/x/tools v0.24.0 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
github.com/go-crypt/x v0.2.21 h1:nLe8l5zGemmFpaBozaIdUmcvaedEbJHi19hf8hJH5B0= | ||
github.com/go-crypt/x v0.2.21/go.mod h1:7fYWzffOGWTnc6vG1NWaQgU0VQOyD7tryYxpQsFW6Do= | ||
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY= | ||
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8= | ||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= | ||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= | ||
golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= | ||
golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= | ||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= | ||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= | ||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= | ||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= | ||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= |
Oops, something went wrong.