From de6eeba0f2f7e593ead75122d534a7884aac029b Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Tue, 7 Jan 2025 11:36:46 +0100 Subject: [PATCH] Armv7m: Add Dilithium test --- Makefile | 1 + .../basemul_257_asymmetric_dilithium.s | 1 + .../basemul_257_asymmetric_dilithium_opt_m7.s | 1 + .../dilithium-armv7m/basemul_257_dilithium.s | 1 + .../basemul_257_dilithium_opt_m7.s | 1 + asm/manual/dilithium-armv7m/caddq_dilithium.s | 1 + .../dilithium-armv7m/caddq_dilithium_opt_m7.s | 1 + .../dilithium-armv7m/fnt_257_dilithium.s | 1 + .../fnt_257_dilithium_opt_m7.s | 1 + .../dilithium-armv7m/ifnt_257_dilithium.s | 1 + .../ifnt_257_dilithium_opt_m7.s | 1 + .../dilithium-armv7m/intt_769_dilithium.s | 1 + .../intt_769_dilithium_opt_m7.s | 1 + .../intt_dilithium_123_456_78.s | 1 + .../intt_dilithium_123_456_78_opt_m7.s | 1 + .../dilithium-armv7m/ntt_769_dilithium.s | 1 + .../ntt_769_dilithium_opt_m7.s | 1 + asm/manual/dilithium-armv7m/ntt_dilithium.s | 1 + .../dilithium-armv7m/ntt_dilithium_opt_m7.s | 1 + .../pointwise_769_asymmetric_dilithium.s | 1 + ...ointwise_769_asymmetric_dilithium_opt_m7.s | 1 + .../pointwise_769_dilithium.s | 1 + .../pointwise_769_dilithium_opt_m7.s | 1 + .../pointwise_acc_montgomery_dilithium.s | 1 + ...ointwise_acc_montgomery_dilithium_opt_m7.s | 1 + .../pointwise_montgomery_dilithium.s | 1 + .../pointwise_montgomery_dilithium_opt_m7.s | 1 + .../dilithium-armv7m/reduce32_dilithium.s | 1 + .../reduce32_dilithium_opt_m7.s | 1 + tests/dilithium-armv7m/dilithium-armv7m.mk | 45 ++ tests/dilithium-armv7m/fnt.h | 62 ++ tests/dilithium-armv7m/main.c | 738 ++++++++++++++++++ tests/dilithium-armv7m/ntt_769.h | 64 ++ tests/dilithium-armv7m/ref.c | 109 +++ tests/dilithium-armv7m/ref.h | 13 + 35 files changed, 1060 insertions(+) create mode 120000 asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/basemul_257_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/basemul_257_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/caddq_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/caddq_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/fnt_257_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/fnt_257_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/ifnt_257_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/ifnt_257_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/intt_769_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/intt_769_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/intt_dilithium_123_456_78.s create mode 120000 asm/manual/dilithium-armv7m/intt_dilithium_123_456_78_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/ntt_769_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/ntt_769_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/ntt_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/ntt_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_769_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_769_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium_opt_m7.s create mode 120000 asm/manual/dilithium-armv7m/reduce32_dilithium.s create mode 120000 asm/manual/dilithium-armv7m/reduce32_dilithium_opt_m7.s create mode 100644 tests/dilithium-armv7m/dilithium-armv7m.mk create mode 100644 tests/dilithium-armv7m/fnt.h create mode 100644 tests/dilithium-armv7m/main.c create mode 100644 tests/dilithium-armv7m/ntt_769.h create mode 100644 tests/dilithium-armv7m/ref.c create mode 100644 tests/dilithium-armv7m/ref.h diff --git a/Makefile b/Makefile index a95e00f..b9b7126 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ include tests/chunk/chunk.mk include tests/crt/crt.mk include tests/ct/ct.mk +include tests/dilithium-armv7m/dilithium-armv7m.mk include tests/flt-fft/flt-fft.mk include tests/fx-fft/fx-fft.mk include tests/helloworld/helloworld.mk diff --git a/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium.s b/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium.s new file mode 120000 index 0000000..879d2dc --- /dev/null +++ b/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/basemul_257_asymmetric_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium_opt_m7.s new file mode 120000 index 0000000..573a4c0 --- /dev/null +++ b/asm/manual/dilithium-armv7m/basemul_257_asymmetric_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/basemul_257_asymmetric_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/basemul_257_dilithium.s b/asm/manual/dilithium-armv7m/basemul_257_dilithium.s new file mode 120000 index 0000000..8d4260d --- /dev/null +++ b/asm/manual/dilithium-armv7m/basemul_257_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/basemul_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/basemul_257_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/basemul_257_dilithium_opt_m7.s new file mode 120000 index 0000000..aaa1c4d --- /dev/null +++ b/asm/manual/dilithium-armv7m/basemul_257_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/basemul_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/caddq_dilithium.s b/asm/manual/dilithium-armv7m/caddq_dilithium.s new file mode 120000 index 0000000..8c04abc --- /dev/null +++ b/asm/manual/dilithium-armv7m/caddq_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/caddq_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/caddq_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/caddq_dilithium_opt_m7.s new file mode 120000 index 0000000..4439894 --- /dev/null +++ b/asm/manual/dilithium-armv7m/caddq_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/caddq_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/fnt_257_dilithium.s b/asm/manual/dilithium-armv7m/fnt_257_dilithium.s new file mode 120000 index 0000000..7293bd1 --- /dev/null +++ b/asm/manual/dilithium-armv7m/fnt_257_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/fnt_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/fnt_257_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/fnt_257_dilithium_opt_m7.s new file mode 120000 index 0000000..480a072 --- /dev/null +++ b/asm/manual/dilithium-armv7m/fnt_257_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/fnt_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ifnt_257_dilithium.s b/asm/manual/dilithium-armv7m/ifnt_257_dilithium.s new file mode 120000 index 0000000..2f6df66 --- /dev/null +++ b/asm/manual/dilithium-armv7m/ifnt_257_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/ifnt_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ifnt_257_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/ifnt_257_dilithium_opt_m7.s new file mode 120000 index 0000000..e44782e --- /dev/null +++ b/asm/manual/dilithium-armv7m/ifnt_257_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/ifnt_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/intt_769_dilithium.s b/asm/manual/dilithium-armv7m/intt_769_dilithium.s new file mode 120000 index 0000000..ee9d29d --- /dev/null +++ b/asm/manual/dilithium-armv7m/intt_769_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/intt_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/intt_769_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/intt_769_dilithium_opt_m7.s new file mode 120000 index 0000000..f3367a0 --- /dev/null +++ b/asm/manual/dilithium-armv7m/intt_769_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/intt_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78.s b/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78.s new file mode 120000 index 0000000..302a9db --- /dev/null +++ b/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/intt_dilithium_123_456_78.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78_opt_m7.s b/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78_opt_m7.s new file mode 120000 index 0000000..246aa56 --- /dev/null +++ b/asm/manual/dilithium-armv7m/intt_dilithium_123_456_78_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/intt_dilithium_123_456_78_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ntt_769_dilithium.s b/asm/manual/dilithium-armv7m/ntt_769_dilithium.s new file mode 120000 index 0000000..849ac2b --- /dev/null +++ b/asm/manual/dilithium-armv7m/ntt_769_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/ntt_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ntt_769_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/ntt_769_dilithium_opt_m7.s new file mode 120000 index 0000000..c2bf398 --- /dev/null +++ b/asm/manual/dilithium-armv7m/ntt_769_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/ntt_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ntt_dilithium.s b/asm/manual/dilithium-armv7m/ntt_dilithium.s new file mode 120000 index 0000000..c25f73b --- /dev/null +++ b/asm/manual/dilithium-armv7m/ntt_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/ntt_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/ntt_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/ntt_dilithium_opt_m7.s new file mode 120000 index 0000000..4464415 --- /dev/null +++ b/asm/manual/dilithium-armv7m/ntt_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/ntt_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium.s b/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium.s new file mode 120000 index 0000000..91096c8 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/pointwise_769_asymmetric_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s new file mode 120000 index 0000000..0b9838e --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_769_dilithium.s b/asm/manual/dilithium-armv7m/pointwise_769_dilithium.s new file mode 120000 index 0000000..70e2a9a --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_769_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/pointwise_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_769_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/pointwise_769_dilithium_opt_m7.s new file mode 120000 index 0000000..6f650c0 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_769_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/pointwise_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium.s b/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium.s new file mode 120000 index 0000000..c480c60 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/pointwise_acc_montgomery_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s new file mode 120000 index 0000000..7f6d7e5 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium.s b/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium.s new file mode 120000 index 0000000..b232539 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/pointwise_montgomery_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium_opt_m7.s new file mode 120000 index 0000000..55bcdb9 --- /dev/null +++ b/asm/manual/dilithium-armv7m/pointwise_montgomery_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/pointwise_montgomery_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/reduce32_dilithium.s b/asm/manual/dilithium-armv7m/reduce32_dilithium.s new file mode 120000 index 0000000..7947265 --- /dev/null +++ b/asm/manual/dilithium-armv7m/reduce32_dilithium.s @@ -0,0 +1 @@ +../../../slothy/examples/naive/armv7m/reduce32_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-armv7m/reduce32_dilithium_opt_m7.s b/asm/manual/dilithium-armv7m/reduce32_dilithium_opt_m7.s new file mode 120000 index 0000000..b006f59 --- /dev/null +++ b/asm/manual/dilithium-armv7m/reduce32_dilithium_opt_m7.s @@ -0,0 +1 @@ +../../../slothy/examples/opt/armv7m/reduce32_dilithium_opt_m7.s \ No newline at end of file diff --git a/tests/dilithium-armv7m/dilithium-armv7m.mk b/tests/dilithium-armv7m/dilithium-armv7m.mk new file mode 100644 index 0000000..657ed81 --- /dev/null +++ b/tests/dilithium-armv7m/dilithium-armv7m.mk @@ -0,0 +1,45 @@ +# Test name - needs to match the directory name +TESTS += dilithium-armv7m + +# All further variables must be prefixed with the capitalized test name + +# Platforms this test should run on (matching the directory name in envs/) +DILITHIUM_ARMV7M_PLATFORMS += m7-an500 +DILITHIUM_ARMV7M_PLATFORMS += nucleo-f767zi +DILITHIUM_ARMV7M_PLATFORMS += stm32f4discovery + +# C sources required for this test +DILITHIUM_ARMV7M_SOURCES += main.c +DILITHIUM_ARMV7M_SOURCES += ref.c + +# Assembly sources required for this test +DILITHIUM_ARMV7M_ASM_DIR = ../../asm/manual/dilithium-armv7m +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/basemul_257_asymmetric_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/basemul_257_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/caddq_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/fnt_257_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ifnt_257_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/intt_769_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ntt_769_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ntt_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/intt_dilithium_123_456_78.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_769_asymmetric_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_769_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_acc_montgomery_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_montgomery_dilithium.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/reduce32_dilithium.s + +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/basemul_257_asymmetric_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/basemul_257_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/caddq_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/fnt_257_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ifnt_257_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/intt_769_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ntt_769_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/ntt_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/intt_dilithium_123_456_78_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_769_asymmetric_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_769_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_acc_montgomery_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/pointwise_montgomery_dilithium_opt_m7.s +DILITHIUM_ARMV7M_ASMS += $(DILITHIUM_ARMV7M_ASM_DIR)/reduce32_dilithium_opt_m7.s diff --git a/tests/dilithium-armv7m/fnt.h b/tests/dilithium-armv7m/fnt.h new file mode 100644 index 0000000..dfbce47 --- /dev/null +++ b/tests/dilithium-armv7m/fnt.h @@ -0,0 +1,62 @@ +#ifndef FNT_H +#define FNT_H + +#include + +#define FNT_Q 257 +#define FNT_Q_PRIME (16711935) // -q^-1 mod 2**32 + +static const int32_t twiddles_ntt_257_streamlined[] __attribute__((aligned(8))) = {-60, -35, -46, -42, 99, 89, -118, 27, -82, 108, -71, 54, 93, -41, 115, 68, 117, 73, -84, -59, -79, 21, -78, 37, -55, -109, 101, 74, -110, 39, 17, -70, -92, -50, -29, 57, -116, 83, 43, 75, -85, -91, 86, -107, 87, 15, -23, -111, -100, -58, 114, 25, -97, -10, 126, -40, 63, -20, -5, -80, -120, 44, -67, -72, -124, -31, 18, -106, 103, 90, -102, 45, -51, -77, 53, -121, -81, -11, 113, 9, -62, 36, -65, -12, -3, -48, 127, -24, -6, -96, 34, 88, 123, -49, -13, 61, -52, 112, -7, -66, -28, -33, -14, 125, -56, 30, 95, -22, -98, -26, 122, -104, -38, -94, 105, -119, -76, 69, -47, 19}; +static const int32_t twiddles_intt_257_streamlined[] __attribute__((aligned(8))) = { -19, 47, -69, 76, 119, -105, 94, 38, 104, -122, 26, 98, 22, -95, -30, 56, -125, 14, 33, 28, 66, 7, -112, 52, -61, 13, 49, -123, -88, -34, 96, 6, 24, -127, 48, 3, 12, 65, -36, 62, -9, -113, 11, 81, 121, -53, 77, 51, -45, 102, -90, -103, 106, -18, 31, 124, 72, 67, -44, 120, 80, 5, 20, -63, 40, -126, 10, 97, -25, -114, 58, 100, 111, 23, -15, -87, 107, -86, 91, 85, -75, -43, -83, 116, -57, 29, 50, 92, 70, -17, -39, 110, -74, -101, 109, 55, -37, 78, -21, 79, 59, 84, -73, -117, -68, -115, 41, -93, -54, 71, -108, 82, -27, 118, -89, -99, 42, 46, 35, 60}; +static const int32_t twiddles_basemul_257[] __attribute__((aligned(8))) = {27, -82, 108, -71, 54, 93, -41, 115, -78, 37, -55, -109, 101, 74, -110, 39, 83, 43, 75, -85, -91, 86, -107, 87, -97, -10, 126, -40, 63, -20, -5, -80, -106, 103, 90, -102, 45, -51, -77, 53, -65, -12, -3, -48, 127, -24, -6, -96, 112, -7, -66, -28, -33, -14, 125, -56, -38, -94, 105, -119, -76, 69, -47, 19}; + + +// inputs in [-2, 2]; outputs in [-128, +128] +void __asm_fnt_257(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); + +void __asm_point_mul_257_16(int16_t p_prime[128], const int32_t p[256], int32_t qprime, int32_t q, const int32_t twiddles[64]); +void __asm_asymmetric_mul_257_16(int32_t c[256], const int32_t a[256], const int32_t b[256], const int16_t b_prime[128]); + +// inputs in [-32768, 32768] outputs in [-128, +128] +void __asm_ifnt_257(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); + +void fnt_ntt(int32_t *a) { + __asm_fnt_257(a, twiddles_ntt_257_streamlined, FNT_Q_PRIME, FNT_Q); +} +void fnt_invntt_tomont(int32_t *a) { + __asm_ifnt_257(a, twiddles_intt_257_streamlined, FNT_Q_PRIME, FNT_Q); +} + +void fnt_point_mul(int16_t * b_prime, int32_t *b){ + __asm_point_mul_257_16(b_prime, b, FNT_Q_PRIME, FNT_Q, twiddles_basemul_257); +} +void fnt_asymmetric_mul(int32_t *c, int32_t *a, int32_t *b, int16_t *b_prime) { + __asm_asymmetric_mul_257_16(c, a, b, b_prime); +} + +void __asm_fnt_257_opt_m7(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); + +void __asm_point_mul_257_16_opt_m7(int16_t p_prime[128], const int32_t p[256], int32_t qprime, int32_t q, const int32_t twiddles[64]); +void __asm_asymmetric_mul_257_16_opt_m7(int32_t c[256], const int32_t a[256], const int32_t b[256], const int16_t b_prime[128]); + +void __asm_ifnt_257_opt_m7(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); + +void fnt_ntt_opt_m7(int32_t *a) { + __asm_fnt_257_opt_m7(a, twiddles_ntt_257_streamlined, FNT_Q_PRIME, FNT_Q); +} + +void fnt_invntt_tomont_opt_m7(int32_t *a) { + __asm_ifnt_257_opt_m7(a, twiddles_intt_257_streamlined, FNT_Q_PRIME, FNT_Q); +} + +void fnt_point_mul_opt_m7(int16_t * b_prime, int32_t *b){ + __asm_point_mul_257_16_opt_m7(b_prime, b, FNT_Q_PRIME, FNT_Q, twiddles_basemul_257); +} + +void fnt_asymmetric_mul_opt_m7(int32_t *c, int32_t *a, int32_t *b, int16_t *b_prime) { + __asm_asymmetric_mul_257_16_opt_m7(c, a, b, b_prime); +} + + + +#endif \ No newline at end of file diff --git a/tests/dilithium-armv7m/main.c b/tests/dilithium-armv7m/main.c new file mode 100644 index 0000000..edaef98 --- /dev/null +++ b/tests/dilithium-armv7m/main.c @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2021 Arm Limited + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * + * Author: Hanno Becker + */ + +#define ENABLE_PMU_STATS /* Do not enable when benching for cycle count */ + +#if defined(ENABLE_PMU_STATS) +#define REPEAT 100 +#define REPEAT_MEDIAN 100 +#else +#define REPEAT 1024 +#endif + +/* + * Some external references to auto-generated assembly. + */ + +#include +#include + +#include +#include +#include +#include "misc.h" +#include "poly.h" +#include "ref.h" +#include "fnt.h" +#include "ntt_769.h" + +void pqcrystals_dilithium_ntt(int32_t *); +void pqcrystals_dilithium_ntt_opt_m7(int32_t *); + +void pqcrystals_dilithium_invntt_tomont(int32_t *); +void pqcrystals_dilithium_invntt_tomont_opt_m7(int32_t *); + +void pqcrystals_dilithium_asm_caddq(int32_t *); +void pqcrystals_dilithium_asm_caddq_opt_m7(int32_t *); + +void pqcrystals_dilithium_asm_pointwise_acc_montgomery(int32_t *, int32_t *, int32_t *); +void pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7(int32_t *, int32_t *, int32_t *); + +void pqcrystals_dilithium_asm_pointwise_montgomery(int32_t *, int32_t *, int32_t *); +void pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7(int32_t *, int32_t *, int32_t *); + +void pqcrystals_dilithium_asm_reduce32(int32_t *); +void pqcrystals_dilithium_asm_reduce32_opt_m7(int32_t *); + +#define NTT_LAYERS 8 +#define NTT_SIZE (1u << NTT_LAYERS) +#define NTT_ROOT_ORDER (2 * NTT_SIZE) +#define NTT_INCOMPLETE_LAYERS 8 +#define NTT_INCOMPLETE_SIZE (1u << NTT_INCOMPLETE_LAYERS) +#define NTT_LAYER_GAP ( NTT_LAYERS - NTT_INCOMPLETE_LAYERS ) +#define NTT_LAYER_STRIDE (1u << NTT_LAYER_GAP ) + + +typedef struct { + char name[100]; + uint64_t cycles; +} benchmark_result; + +benchmark_result results[100]; +int benchmark_cnt = 0; + +static void add_benchmark_results(char *name, uint64_t cycles){ + if(benchmark_cnt == 100) return; + + results[benchmark_cnt].cycles = cycles; + strncpy(results[benchmark_cnt].name, name, 100); + benchmark_cnt++; +} + +static void dump_benchmarks_tex(void){ + for(int i=0;i> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_BASEMUL(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int32_t src[NTT_SIZE] __attribute__((aligned(16))); \ + int32_t src1[NTT_SIZE] __attribute__((aligned(16))); \ + int32_t src2[NTT_SIZE] __attribute__((aligned(16))); \ + (func)(src, src1, src2); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src, src1, src2); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_POINTMUL(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int32_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + (func)(src, src1); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src, src1); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_ASYM_MUL(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int32_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int32_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int32_t src2[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int16_t src3[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + (func)(src, src1, src2, src3); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src, src1, src2, src3); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_NTT_16(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int16_t src[NTT_SIZE] __attribute__((aligned(16))); \ + (func)(src); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_POINTMUL_16(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + (func)(src, src1); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src, src1); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +#define MAKE_BENCH_ASYM_MUL_16(var, func) \ + int bench_##var() \ + { \ + uint64_t t1, t2; \ + uint64_t cycles[REPEAT_MEDIAN]; \ + int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int16_t src2[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + int16_t src3[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ + (func)(src, src1, src2, src3); \ + for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ + { \ + t1 = hal_get_time(); \ + for (size_t cnt = 0; cnt < REPEAT; cnt++) \ + (func)(src, src1, src2, src3); \ + t2 = hal_get_time(); \ + cycles[cnt_median] = (t2 - t1) / REPEAT; \ + } \ + qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ + debug_printf(#var " repeat %d, %d", \ + REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ + add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ + return (0); \ + } + +// Q=8380417 polynomial multiplication +MAKE_BENCH_NTT(pqcrystals_dilithium_ntt,pqcrystals_dilithium_ntt) +MAKE_BENCH_NTT(pqcrystals_dilithium_ntt_opt_m7,pqcrystals_dilithium_ntt_opt_m7) + +MAKE_BENCH_NTT(pqcrystals_dilithium_invntt_tomont,pqcrystals_dilithium_invntt_tomont) +MAKE_BENCH_NTT(pqcrystals_dilithium_invntt_tomont_opt_m7,pqcrystals_dilithium_invntt_tomont_opt_m7) + +MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_montgomery,pqcrystals_dilithium_asm_pointwise_montgomery) +MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7,pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7) + +MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_acc_montgomery,pqcrystals_dilithium_asm_pointwise_acc_montgomery) +MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7,pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7) + +// Q=257 polynomial multiplication +MAKE_BENCH_NTT(__asm_fnt_257,fnt_ntt) +MAKE_BENCH_NTT(__asm_fnt_257_opt_m7,fnt_ntt_opt_m7) + +MAKE_BENCH_NTT(__asm_ifnt_257,fnt_invntt_tomont) +MAKE_BENCH_NTT(__asm_ifnt_257_opt_m7,fnt_invntt_tomont_opt_m7) + +MAKE_BENCH_POINTMUL(__asm_point_mul_257_16,fnt_point_mul) +MAKE_BENCH_POINTMUL(__asm_point_mul_257_16_opt_m7,fnt_point_mul_opt_m7) + +MAKE_BENCH_ASYM_MUL(__asm_asymmetric_mul_257_16,fnt_asymmetric_mul) +MAKE_BENCH_ASYM_MUL(__asm_asymmetric_mul_257_16_opt_m7,fnt_asymmetric_mul_opt_m7) + +// Q=769 polynomial multiplication +MAKE_BENCH_NTT_16(small_ntt_asm_769,small_ntt) +MAKE_BENCH_NTT_16(small_ntt_asm_769_opt_m7,small_ntt_opt_m7) + +MAKE_BENCH_NTT_16(small_invntt_asm_769,small_invntt_tomont) +MAKE_BENCH_NTT_16(small_invntt_asm_769_opt_m7,small_invntt_tomont_opt_m7) + +MAKE_BENCH_POINTMUL_16(small_pointmul_asm_769,small_point_mul) +MAKE_BENCH_POINTMUL_16(small_pointmul_asm_769_opt_m7,small_point_mul_opt_m7) + +MAKE_BENCH_ASYM_MUL_16(small_asymmetric_mul_asm_769,small_asymmetric_mul_asm_769) +MAKE_BENCH_ASYM_MUL_16(small_asymmetric_mul_asm_769_opt_m7,small_asymmetric_mul_asm_769_opt_m7) + +// other arithmetic +MAKE_BENCH_NTT(pqcrystals_dilithium_asm_reduce32,pqcrystals_dilithium_asm_reduce32) +MAKE_BENCH_NTT(pqcrystals_dilithium_asm_reduce32_opt_m7,pqcrystals_dilithium_asm_reduce32_opt_m7) + +MAKE_BENCH_NTT(pqcrystals_dilithium_asm_caddq,pqcrystals_dilithium_asm_caddq) +MAKE_BENCH_NTT(pqcrystals_dilithium_asm_caddq_opt_m7,pqcrystals_dilithium_asm_caddq_opt_m7) + +int main(void) +{ + int ret = 0; + debug_test_start( "\nDilithium All Test!\n" ); + + /* Test cases */ + + // Q=8380417 polynomial multiplication + if( test_ntt_pqm4() != 0 ){return( 1 );} + if( test_ntt_pqm4_opt() != 0 ){return( 1 );} + + if( test_intt_pqm4() != 0 ){return( 1 );} + if( test_intt_pqm4_opt() != 0 ){return( 1 );} + + if( test_pointwise_montgomery_pqm4() != 0 ){ return( 1 ); } + if( test_pointwise_montgomery_pqm4_opt() != 0 ){ return( 1 ); } + + if( test_pointwise_acc_montgomery_pqm4() != 0 ){ return( 1 ); } + if( test_pointwise_acc_montgomery_pqm4_opt() != 0 ){ return( 1 ); } + + // Q=257 polynomial multiplication + if( test_fnt_257_pqm4() != 0 ){return( 1 );} + if( test_fnt_257_pqm4_opt() != 0 ){return( 1 );} + + if( test_ifnt_257_pqm4() != 0 ){return( 1 );} + if( test_ifnt_257_pqm4_opt() != 0 ){return( 1 );} + + if( test_point_mul_257_16_pqm4() != 0 ){return( 1 );} + if( test_point_mul_257_16_pqm4_opt() != 0 ){return( 1 );} + + if( test_asymmetric_mul_257_16_pqm4() != 0 ){return( 1 );} + if( test_asymmetric_mul_257_16_pqm4_opt() != 0 ){return( 1 );} + + // Q=769 polynomial multiplication + if( test_ntt_769_pqm4() != 0 ){return( 1 );} + if( test_ntt_769_pqm4_opt() != 0 ){return( 1 );} + + if( test_intt_769_pqm4() != 0 ){return( 1 );} + if( test_intt_769_pqm4_opt() != 0 ){return( 1 );} + + if( test_pointmul_769_pqm4() != 0 ){ return( 1 ); } + if( test_pointmul_769_pqm4_opt() != 0 ){ return( 1 ); } + + if( test_asymmetric_mul_769_pqm4() != 0 ){ return( 1 ); } + if( test_asymmetric_mul_769_pqm4_opt() != 0 ){ return( 1 ); } + + // other arithmetic + if( test_reduce32_pqm4() != 0 ){ return( 1 ); } + if( test_reduce32_pqm4_opt() != 0 ){ return( 1 ); } + + if( test_caddq_pqm4() != 0 ){return( 1 );} + if( test_caddq_pqm4_opt() != 0 ){return( 1 );} + + + /* Benchmarks */ + + // Q=8380417 polynomial multiplication + bench_pqcrystals_dilithium_ntt(); + bench_pqcrystals_dilithium_ntt_opt_m7(); + + bench_pqcrystals_dilithium_invntt_tomont(); + bench_pqcrystals_dilithium_invntt_tomont_opt_m7(); + + bench_pqcrystals_dilithium_asm_pointwise_montgomery(); + bench_pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7(); + + bench_pqcrystals_dilithium_asm_pointwise_acc_montgomery(); + bench_pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7(); + + // Q=257 polynomial multiplication + bench___asm_fnt_257(); + bench___asm_fnt_257_opt_m7(); + + bench___asm_ifnt_257(); + bench___asm_ifnt_257_opt_m7(); + + bench___asm_point_mul_257_16(); + bench___asm_point_mul_257_16_opt_m7(); + + bench___asm_asymmetric_mul_257_16(); + bench___asm_asymmetric_mul_257_16_opt_m7(); + + // Q=769 polynomial multiplication + bench_small_ntt_asm_769(); + bench_small_ntt_asm_769_opt_m7(); + + bench_small_invntt_asm_769(); + bench_small_invntt_asm_769_opt_m7(); + + bench_small_pointmul_asm_769(); + bench_small_pointmul_asm_769_opt_m7(); + + bench_small_asymmetric_mul_asm_769(); + bench_small_asymmetric_mul_asm_769_opt_m7(); + + // other arithmetic + bench_pqcrystals_dilithium_asm_reduce32(); + bench_pqcrystals_dilithium_asm_reduce32_opt_m7(); + + bench_pqcrystals_dilithium_asm_caddq(); + bench_pqcrystals_dilithium_asm_caddq_opt_m7(); + + debug_printf( "Done!\n" ); + + debug_printf("======================" ); + dump_benchmarks_tex(); + debug_printf("======================\n" ); + + debug_printf( "ALL GOOD!\n" ); + return( ret ); +} diff --git a/tests/dilithium-armv7m/ntt_769.h b/tests/dilithium-armv7m/ntt_769.h new file mode 100644 index 0000000..8d66a75 --- /dev/null +++ b/tests/dilithium-armv7m/ntt_769.h @@ -0,0 +1,64 @@ +#ifndef NTT_769_H +#define NTT_769_H + +#include +#define SMALL_Q 769 +#define N 256 + +static const int32_t zetas_769[64] __attribute__((aligned(8))) = { + 3138844760, 1334846793, 999738812, 1854264165, 1681125041, 1150537404, 2820492178, 3071823164, 726067294, 2066499220, 3272887953, 1055590142, 4255871365, 1871019564, 2731130050, 1826338500, 513832239, 1792827701, 3373420347, 2993631302, 1161707670, 3306398751, 3518633806, 3406931146, 1586177780, 3853741788, 3317569017, 3825816122, 971813147, 122872927, 217820188, 619949766, 3753209393, 770748358, 4099487641, 765163225, 3630336467, 1742561504, 3479537875, 982983413, 2809321912, 2379266669, 703726762, 681386230, 4110657907, 1457719720, 1217559000, 2474213930, 1195218468, 1089100940, 564098436, 614364633, 3635921600, 2088839752, 3702943196, 1949211426, 2569161192, 374203913, 3982199847, 2083254619, 1513571050, 3647091866, 413299844, 4149753838}; + +static const int32_t zetas_asm_769[128] __attribute__((aligned(8))) = { + 346278248, 223405321, 966228013, 759578091, -150798592, 318352582, -1736976371, 1697880440, -2105595150, -804259156, 1675539907, -1016494210, 1401868389, -2005062756, 240160720, 474736307, -1200803600, -1435379187, -1156122536, 1334846793, 999738811, 1854264164, -631120032, -787503756, -1580592646, 1681125040, 1150537403, -1474475119, -1223144132, 1809583100, -100532394, -1938041160, 726067293, 2066499219, -1022079344, 1055590142, 525002504, 273671518, -212235055, -39095931, 1871019563, -1563837247, 1826338499, 139628326, 27925665, 1731391238, 513832238, 1792827701, -921546949, -1301335995, 67021596, 1117026605, 536172770, 1161707669, -988568545, -776333490, -888036151, 1290165729, -497076839, -753992958, 1586177779, -441225509, -977398279, -469151174, -1614103444, 1591762912, -94947261, 971813146, 122872927, 217820188, 619949766, -1709050706, 1010909077, -1748146637, -541757903, 770748357, -195479656, 765163224, 1413038655, 1781657435, -1206388733, -664630830, 1742561504, -815429422, 982983412, 357448514, 44681064, -1524741316, -1485645385, -1915700627, 703726761, 681386229, 686971362, 1787242568, -860110486, -184309390, 1457719719, 1217558999, -1820753366, -502661972, -1921285760, 1139367137, 1195218467, 1089100940, 564098435, 614364633, -1100271206, 457980908, -1669954774, -659045697, 2088839751, -592024101, 1949211426, 1368357591, 698141628, 335107981, -1725806105, 374203913, -312767449, 2083254618, -1061175275, -2139105948, 519417371, 1513571050, -647875431, 413299844, -145213459, 0}; + +// INTT with CT butterfly +static const int32_t zetas_inv_asm_769[256] __attribute__((aligned(8))) = { + 5585134, 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 5585134, 1736976371, -966228013, 150798592, -346278248, -318352582, -223405321, -759578091, + // removed first "2285" + LAYER 3+2+1 - 1 - butterfly + 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 636705165, 446810642, 1519156183, 11170266, -821014555, -1932456027, 301597183, -692556495, -240160720, 1061175275, -1368357591, -519417371, -335107981, 2139105948, -698141628, -625534899, -1267825197, 843355087, 290426917, 128458060, 1295750862, -748407825, -826599688, 1736976371, -240160720, 2005062756, 1061175275, 1100271206, -1368357591, 502661972, 915961816, 1396283256, 452395775, -1038834743, -955057747, -670215963, 2016233022, -16755399, -1675539907, 1614103444, -1290165729, 94947261, 753992958, -1591762912, 497076839, -1954796559, 1943626293, -1122611738, -1239899531, 938302348, -245745853, 882451018, -435640376, -966228013, 1736976371, -318352582, -240160720, -1401868389, 2005062756, 1016494210, 714897027, -1005323944, 876865885, 2122350549, -1373942724, -2094424884, 1468889985, 1558252114, -1401868389, -686971362, -357448514, 860110486, 1524741316, -1787242568, -44681064, 1407453522, -368618780, 1323676527, -653460564, -1362772458, 1379527857, -463566041, 1859849297, 150798592, -1675539907, 804259156, 1614103444, -67021596, -1290165729, -139628326, -2060914086, -994153678, 55851330, 189894523, -1072345541, 1507985917, 832184821, 1111441472, 2105595150, -525002504, -1809583100, 212235055, 1938041160, -273671518, 100532394, -2044158687, -78191862, 1452134586, 642290298, -2111180283, 552928169, 161968858, -1167292802, -346278248, -966228013, -223405321, 1736976371, 150798592, -318352582, -759578091, -1608518311, -2032988421, -899206417, -480321440, 943887481, 1491230518, -83776995, -284841784, 2005062756, 1100271206, 502661972, 1669954774, -1139367137, -457980908, 1921285760, 1128196871, -1318091394, -1904530361, 396544445, -1228729265, 117287794, 2116765416, 1184048201, -318352582, -1401868389, 1016494210, -686971362, -1413038655, -357448514, 1709050706, -731652426, 89362128, 2021818155, 1720220972, -1882189829, -1245484665, -798674023, 720482160, 804259156, -67021596, -139628326, -536172770, -1731391238, -1117026605, -27925665, -1843093898, -1971551958, 1027664477, 1776072302, -1692295306, 1977137091, 709311894, 1552666981, -223405321, 150798592, -759578091, -1675539907, 2105595150, 804259156, -1697880440, -675801096, 279256651, 949472614, -1066760408, -1050005009, -134043193, 1262240064, 1714635839, 1016494210, -1413038655, 1709050706, 1206388733, 1748146637, -1781657435, -1010909077, -390959312, -1329261660, -1083515807, -1965966825, -1530326449, 809844289, -1541496715, 1630858843, -759578091, 2105595150, -1697880440, -525002504, 631120032, -1809583100, -474736307, -1575007513, -201064789, 1893360095, 424470110, -1133782004, -418884977, -1424208921, -547343036, -1697880440, 631120032, -474736307, 1580592646, 1435379187, 787503756, 1200803600, 1999477623, -932717215, 1982722224, -1848679031, 586438968, 1993892490, 1625273710, -1346017059, 0}; + +// Q1=769 +void small_ntt_asm_769(int16_t a[N], const int32_t *zetas); +void small_invntt_asm_769(int16_t a[N], const int32_t *zetas); +void small_pointmul_asm_769(int16_t out[N], const int16_t in[N], const int32_t *zetas); +void small_asymmetric_mul_asm_769(int16_t c[N], const int16_t a[N], const int16_t b[N], const int16_t b_prime[N]); + +// small NTT for computing cs0 and cs1; default use 769 as modulus. +void small_ntt(int16_t *a) { + small_ntt_asm_769(a, zetas_asm_769); +} +void small_invntt_tomont(int16_t *a) { + small_invntt_asm_769(a, zetas_inv_asm_769); +} +void small_point_mul(int16_t *out, int16_t *in) { + small_pointmul_asm_769(out, in, zetas_769); +} + +void small_asymmetric_mul(int16_t *c, int16_t *a, int16_t *b, int16_t *b_prime) { + small_asymmetric_mul_asm_769(c, a, b, b_prime); +} + + +void small_ntt_asm_769_opt_m7(int16_t a[N], const int32_t *zetas); +void small_invntt_asm_769_opt_m7(int16_t a[N], const int32_t *zetas); +void small_pointmul_asm_769_opt_m7(int16_t out[N], const int16_t in[N], const int32_t *zetas); +void small_asymmetric_mul_asm_769_opt_m7(int16_t c[N], const int16_t a[N], const int16_t b[N], const int16_t b_prime[N]); + +void small_ntt_opt_m7(int16_t *a) { + small_ntt_asm_769_opt_m7(a, zetas_asm_769); +} + +void small_invntt_tomont_opt_m7(int16_t *a) { + small_invntt_asm_769_opt_m7(a, zetas_inv_asm_769); +} + +void small_point_mul_opt_m7(int16_t *out, int16_t *in) { + small_pointmul_asm_769_opt_m7(out, in, zetas_769); +} + + +void small_asymmetric_mul_opt_m7(int16_t *c, int16_t *a, int16_t *b, int16_t *b_prime) { + small_asymmetric_mul_asm_769_opt_m7(c, a, b, b_prime); +} + +#endif diff --git a/tests/dilithium-armv7m/ref.c b/tests/dilithium-armv7m/ref.c new file mode 100644 index 0000000..76d9f40 --- /dev/null +++ b/tests/dilithium-armv7m/ref.c @@ -0,0 +1,109 @@ +#include "ref.h" + +#define MONT (-4186625) // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +static int32_t montgomery_reduce(int64_t a) { + int32_t t; + + t = (int32_t)((uint64_t)a * (uint64_t)QINV); + t = (a - (int64_t)t * Q) >> 32; + return t; +} + + + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: PQCLEAN_MLDSA44_CLEAN_ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void ntt_ref(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for (len = 128; len > 0; len >>= 1) { + for (start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for (j = start; j < start + len; ++j) { + t = montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + + +/************************************************* +* Name: PQCLEAN_MLDSA44_CLEAN_invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void invntt_tomont_ref(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for (len = 1; len < N; len <<= 1) { + for (start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for (j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for (j = 0; j < N; ++j) { + a[j] = montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/tests/dilithium-armv7m/ref.h b/tests/dilithium-armv7m/ref.h new file mode 100644 index 0000000..22af658 --- /dev/null +++ b/tests/dilithium-armv7m/ref.h @@ -0,0 +1,13 @@ +#ifndef REF_H +#define REF_H + +#include + +#define Q 8380417 +#define N 256 + + +void ntt_ref(int32_t a[N]); +void invntt_tomont_ref(int32_t a[N]); + +#endif