From 8b81f38e239b24be2b2dc9d89cf6a9fc2be8b5aa Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 14 Jun 2024 11:28:52 +0900 Subject: [PATCH 01/12] add getBitSize() --- include/mcl/util.hpp | 21 +++++++++++++-------- include/mcl/vint.hpp | 5 ++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp index 6ec62b52..eea1c86a 100644 --- a/include/mcl/util.hpp +++ b/include/mcl/util.hpp @@ -61,6 +61,18 @@ T getUnitAt(const T *x, size_t xN, size_t bitPos) return (x[q] >> r) | (x[q + 1] << (TbitSize - r)); } +template +size_t getBitSize(const T *x, size_t n) +{ + while (n > 0 && (x[n - 1] == 0)) { + n--; + } + if (n == 0) { + return 0; + } + return (n - 1) * sizeof(T) * 8 + 1 + cybozu::bsr(x[n - 1]); +} + template class BitIterator { const T *x_; @@ -78,14 +90,7 @@ class BitIterator { { x_ = x; bitPos_ = 0; - while (n > 0 && (x[n - 1] == 0)) { - n--; - } - if (n == 0) { - bitSize_ = 0; - return; - } - bitSize_ = (n - 1) * sizeof(T) * 8 + 1 + cybozu::bsr(x_[n - 1]); + bitSize_ = mcl::fp::getBitSize(x, n); } size_t getBitSize() const { return bitSize_; } bool hasNext() const { return bitPos_ < bitSize_; } diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp index de1481e8..e26e0b3a 100644 --- a/include/mcl/vint.hpp +++ b/include/mcl/vint.hpp @@ -429,9 +429,8 @@ class Vint { { if (isZero()) return 1; size_t n = size(); - Unit v = buf_[n - 1]; - assert(v); - return (n - 1) * sizeof(Unit) * 8 + 1 + cybozu::bsr(v); + assert(buf_[n-1]); + return mcl::fp::getBitSize(buf_, n); } // ignore sign bool testBit(size_t i) const From b7554752f0a5d26c84f5f3cdcd582b83aeee8856 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 17 Jun 2024 16:30:46 +0900 Subject: [PATCH 02/12] [misc] add small modp --- misc/mul-approx.py | 139 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 misc/mul-approx.py diff --git a/misc/mul-approx.py b/misc/mul-approx.py new file mode 100644 index 00000000..c6ea1f75 --- /dev/null +++ b/misc/mul-approx.py @@ -0,0 +1,139 @@ +""" +how to find the quotient of p/x. + +# Notation +d = 26 # half of the double-precision bit length (52) +l = p.bit_length(), then 2**(l-1) <= p < 2**l +a = x.bit_length(), then 2**(a-1) <= x < 2**a + +assume d < l <= a <= l + d - 3 +e = a - l + 1, then e <= d - 2 +assume max(e) = 9 + +# Preparation +(p0, p1) = divmod(2**(d+l-1), p) +# 2**(d+l-1) = p0 * p + p1, p1 < p + +# Quotient +input : x < 2**(l+e-1) +a = x.bit_length() +(x0, x1) = divmod(x, 2**(a-d)) +# x = x0 * 2**(a-d) + x1, x1 < 2**(a-d) + +s = 2 * d - e +S = 2**s + +Q'=(x0 * p0) >> s +# (Q', R') = divmod(x0 * p0, S), x0 * p0 = Q' S + R', R' < S + +(Q, R) = divmod(x, p), x = Q * p + R, R < p + +# Theorem +0 <= Q - Q' <= 1 + +S p (Q - Q') = S (p Q) - p (S Q') = S(x - R) - p(x0 p0 - R') + = S(x0 * 2**(a-d) + x1 - R) - x0 p p0 + p R' + = 2**(2 d - e + a - d) x0 + S x1 - S R - x0 (2**(d+l-1) - p1)) + p R' + = 2**(d + l -1) x0 + S x1 - S R - x0 2**(d+l-1) + x0 p1 + p R' + = S x1 + p1 x0 + p R' - S R + +Q - Q' = (x1/p) + (p1/p)(x0/S) + (R'/S) - (R/p) + +0 <= x1/p < 2**(a-d)/2**(l-1) = 1/2**(d-(a-l+1))=1/2**(d-e) <= 1/4 +0 <= p1/p < 1 +0 <= x0/S < 2**d / 2**(2d-e) = 1/2**(d-e) <= 1/4 +0 <= R'/S < 1 +0 <= R/p < 1 + +-1 < Q - Q' < 1+1/2 +""" +class ApproxMul: + def __init__(self, p, d): + self.p = p + self.d = d + self.l = p.bit_length() + t = 1<<(d+self.l-1) + (q, r) = divmod(t, self.p) + self.p0 = q + self.p1 = r + + def __str__(self): + return f'''p={self.p} +d={self.d} +l={self.l} +p0={self.p0} +p1={self.p1}''' + + def getTop(self, x): + """ + return (x0, x1) such that x = x0 * 2**(a-d) + x1 where a = x.bit_length() + """ + if x < self.p: + return (0, x) + a = x.bit_length() + t = 1<<(a-self.d) + return divmod(x, t) + + def quot(self, x): + (x0, x1) = self.getTop(x) + a = x.bit_length() + s= 2*self.d -(a - self.l + 1) + return (x0 * self.p0) >> s + + def check(self, x): + (x0, x1) = self.getTop(x) + a = x.bit_length() + (Q, R) = divmod(x, self.p) + S = 1<<(2*self.d - (a - self.l + 1)) + (Q2, R2) = divmod(x0 * self.p0, S) + lhs = S * self.p * (Q - Q2) + rhs = S * x1 + self.p1 * x0 - S * R + self.p * R2 + if Q == Q2 or Q == Q2 + 1: + return + if Q != Q2: + print('rare case') + print(f'{x=}') + print(f'{x0=}') + print(f'{x1=}') + print(f'{Q=}') + print(f'{R=}') + print(f'{Q2=}') + print(f'{R2=}') + if lhs != rhs: + print(f'check err {x=}') + print(f'{Q=} {R=}') + print(f'{Q2=} {R2=}') + print(f'{lhs=} {rhs=} {lhs==rhs=}') + ERR + + + +def test(p): + print(f'test {p=}') + import random + app = ApproxMul(p, 26) + print(app) + + MAX = p * 256 + random.seed(a=12345) + + app.check(715409340372908097786544094000490505679080949411292527675476747206857849744375764344129765863746114129605942739419060) + ERR + for i in range(0, 100): + x = p * i + app.check(x) + x = p * i + p-1 + app.check(x) + + for i in range(1000000): + x = random.randint(p, p*2) *random.randint(1, 256) + app.check(x) + +def __main__(): + r = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 + p = 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab + test(p) + test(r) + + +__main__() From 68ad17b0c8c8c6b49281568f05d0746422f4ad5e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 17 Jun 2024 16:52:03 +0900 Subject: [PATCH 03/12] add SmallModP --- src/low_func.hpp | 65 ++++++++++++++++++++++++++++++++++++++++++ test/low_func_test.cpp | 62 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/low_func.hpp b/src/low_func.hpp index 6f4ffda9..d7e29dfe 100644 --- a/src/low_func.hpp +++ b/src/low_func.hpp @@ -288,6 +288,71 @@ static void sqrModT(Unit *y, const Unit *x, const Unit *p) fpDblModT(y, xx, p); } +template +struct SmallModP { + const size_t maxE_ = d - 2; + const Unit *p_; + const size_t l_; + uint32_t p0_; + + // p must not be temporary. + explicit SmallModP(const Unit *p) + : p_(p) + , l_(getBitSize(p, N)) + { + Unit t[N+1] = {}; + size_t pos = d + l_ - 1; + { + size_t q = pos / MCL_UNIT_BIT_SIZE; + size_t r = pos % MCL_UNIT_BIT_SIZE; + t[q] = Unit(1) << r; + } + // p0 = 2**(d+l-1)/p + Unit q[2]; + mcl::bint::div(q, 2, t, N+1, p, N); + assert(q[1] == 0); + p0_ = uint32_t(q[0]); + } + Unit approx(Unit x0, size_t a) const + { +// uint64_t t = uint64_t(double(x0) * double(p0_)); // for d = 26 + uint32_t t = uint32_t(x0 * p0_); + return Unit(t >> (2 * d + l_ - 1 - a)); + } + // x[xn] %= p + // the effective range of return value is [0, N) + bool quot(Unit *pQ, const Unit *x, size_t xn) const + { + size_t a = getBitSize(x, xn); + if (a < l_) { + *pQ = 0; + return true; + } + size_t e = a - l_ + 1; + if (e > maxE_) return false; + Unit x0 = getUnitAt(x, xn, a - d); + *pQ = approx(x0, a); + return true; + } + // return false if x[0, xn) is large + bool mod(Unit *x, size_t xn) const + { + assert(xn <= N + 1); + Unit Q; + if (!quot(&Q, x, xn)) return false; + if (Q == 0) return true; + Unit t[N+1]; + t[N] = mcl::bint::mulUnitT(t, p_, Q); + mcl::bint::subT(t, x, t); + if (mcl::bint::cmpGeT(t, p_)) { + mcl::bint::subT(x, t, p_); + } else { + mcl::bint::copyT(x, t); + } + return true; + } +}; + } } // mcl::fp #ifdef _MSC_VER diff --git a/test/low_func_test.cpp b/test/low_func_test.cpp index 26b54b65..ec837376 100644 --- a/test/low_func_test.cpp +++ b/test/low_func_test.cpp @@ -2,6 +2,7 @@ #include #include #include "../src/low_func.hpp" +#include #define PUT(x) std::cout << #x "=" << (x) << std::endl; @@ -71,12 +72,58 @@ void testEdge(const mpz_class& p) } } +template +void setAndMod(const mcl::fp::SmallModP& smp, Unit *x) +{ + x[N] = x[0] & 0x3f; + if (!smp.mod(x, N+1)) { + puts("ERR"); + } +} + +template +void testSmallModP(const mpz_class& p) +{ + mcl::fp::SmallModP smp(p.getUnit()); + cybozu::XorShift rg; + mpz_class x; + for (size_t i = 0; i < 1000; i++) { + x.setRand(p, rg); + x += p; + x *= int(rg.get32() % 128) + 1; + Unit Q1, Q2; + Q1 = (x / p).getLow32bit(); + bool b; + b = smp.quot(&Q2, x.getUnit(), x.getUnitSize()); + CYBOZU_TEST_ASSERT(b); + if (b) { + CYBOZU_TEST_ASSERT(Q1 == Q2 || Q1 == Q2 + 1); + } + Unit x2[N+1] = {}; + mcl::bint::copyN(x2, x.getUnit(), x.getUnitSize()); + b = smp.mod(x2, x.getUnitSize()); + mpz_class x3 = x % p; + CYBOZU_TEST_ASSERT(b); + CYBOZU_TEST_EQUAL_ARRAY(x2, x3.getUnit(), x3.getUnitSize()); + } +#ifdef NDEBUG + { + if ((smp.p_[N-1] >> (MCL_UNIT_BIT_SIZE - 8)) == 0) return; // top 8-bit must be not zero + Unit x[N+1]; + mcl::gmp::getArray(x, N+1, p); + CYBOZU_BENCH_C("mod", 1000, setAndMod, smp, x); + } +#endif +} + CYBOZU_TEST_AUTO(limit) { + const size_t adj = 8 / sizeof(Unit); std::cout << std::hex; const char *tbl4[] = { "0000000000000001000000000000000000000000000000000000000000000085", // min prime "2523648240000001ba344d80000000086121000000000013a700000000000013", + "73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001", // BLS12-381 r "7523648240000001ba344d80000000086121000000000013a700000000000017", "800000000000000000000000000000000000000000000000000000000000005f", "fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", // secp256k1 @@ -90,7 +137,20 @@ CYBOZU_TEST_AUTO(limit) printf("p=%s\n", tbl4[i]); mpz_class p; mcl::gmp::setStr(p, tbl4[i], 16); - testEdge<4 * (8 / sizeof(Unit))>(p); + testEdge<4 * adj>(p); + testSmallModP<4 * adj>(p); + } + const char *tbl6[] = { + "1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", // BLS12-381 p + "240026400f3d82b2e42de125b00158405b710818ac000007e0042f008e3e00000000001080046200000000000000000d", // BN381 r + "240026400f3d82b2e42de125b00158405b710818ac00000840046200950400000000001380052e000000000000000013", // BN381 p + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl6); i++) { + printf("p=%s\n", tbl6[i]); + mpz_class p; + mcl::gmp::setStr(p, tbl6[i], 16); + testEdge<6 * adj>(p); + testSmallModP<6 * adj>(p); } } From 38238342c25713ab705b54814fd1f58d3422d61c Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 20 Jun 2024 11:35:35 +0900 Subject: [PATCH 04/12] add mod and modT --- src/low_func.hpp | 33 +++++++++++++++++++++++++++------ test/low_func_test.cpp | 42 +++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/low_func.hpp b/src/low_func.hpp index d7e29dfe..a20a3bfb 100644 --- a/src/low_func.hpp +++ b/src/low_func.hpp @@ -288,19 +288,22 @@ static void sqrModT(Unit *y, const Unit *x, const Unit *p) fpDblModT(y, xx, p); } -template struct SmallModP { + const size_t d = 16; // d = 26 if use double in approx const size_t maxE_ = d - 2; const Unit *p_; + const size_t n_; const size_t l_; uint32_t p0_; // p must not be temporary. - explicit SmallModP(const Unit *p) + explicit SmallModP(const Unit *p, size_t n) : p_(p) - , l_(getBitSize(p, N)) + , n_(n) + , l_(getBitSize(p, n)) { - Unit t[N+1] = {}; + Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); + mcl::bint::clearN(t, n_+1); size_t pos = d + l_ - 1; { size_t q = pos / MCL_UNIT_BIT_SIZE; @@ -309,7 +312,7 @@ struct SmallModP { } // p0 = 2**(d+l-1)/p Unit q[2]; - mcl::bint::div(q, 2, t, N+1, p, N); + mcl::bint::div(q, 2, t, n_+1, p, n_); assert(q[1] == 0); p0_ = uint32_t(q[0]); } @@ -320,7 +323,7 @@ struct SmallModP { return Unit(t >> (2 * d + l_ - 1 - a)); } // x[xn] %= p - // the effective range of return value is [0, N) + // the effective range of return value is [0, n_) bool quot(Unit *pQ, const Unit *x, size_t xn) const { size_t a = getBitSize(x, xn); @@ -336,6 +339,24 @@ struct SmallModP { } // return false if x[0, xn) is large bool mod(Unit *x, size_t xn) const + { + assert(xn <= n_ + 1); + Unit Q; + if (!quot(&Q, x, xn)) return false; + if (Q == 0) return true; + Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); + t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); + mcl::bint::subN(t, x, t, n_+1); + if (mcl::bint::cmpGeN(t, p_, n_)) { + mcl::bint::subN(x, t, p_, n_); + } else { + mcl::bint::copyN(x, t, n_); + } + return true; + } + template + // return false if x[0, xn) is large + bool modT(Unit *x, size_t xn) const { assert(xn <= N + 1); Unit Q; diff --git a/test/low_func_test.cpp b/test/low_func_test.cpp index ec837376..05a7e2df 100644 --- a/test/low_func_test.cpp +++ b/test/low_func_test.cpp @@ -73,21 +73,31 @@ void testEdge(const mpz_class& p) } template -void setAndMod(const mcl::fp::SmallModP& smp, Unit *x) +void setAndModT(const mcl::fp::SmallModP& smp, Unit *x, size_t xn) { - x[N] = x[0] & 0x3f; - if (!smp.mod(x, N+1)) { - puts("ERR"); + x[smp.n_] = x[0] & 0x3f; + if (!smp.modT(x, xn)) { + puts("ERR2"); + exit(1); + } +} + +void setAndMod(const mcl::fp::SmallModP& smp, Unit *x, size_t xn) +{ + x[smp.n_] = x[0] & 0x3f; + if (!smp.mod(x, xn)) { + puts("ERR1"); + exit(1); } } template void testSmallModP(const mpz_class& p) { - mcl::fp::SmallModP smp(p.getUnit()); + mcl::fp::SmallModP smp(p.getUnit(), N); cybozu::XorShift rg; mpz_class x; - for (size_t i = 0; i < 1000; i++) { + for (size_t i = 0; i < 10; i++) { x.setRand(p, rg); x += p; x *= int(rg.get32() % 128) + 1; @@ -99,19 +109,25 @@ void testSmallModP(const mpz_class& p) if (b) { CYBOZU_TEST_ASSERT(Q1 == Q2 || Q1 == Q2 + 1); } - Unit x2[N+1] = {}; - mcl::bint::copyN(x2, x.getUnit(), x.getUnitSize()); - b = smp.mod(x2, x.getUnitSize()); - mpz_class x3 = x % p; - CYBOZU_TEST_ASSERT(b); - CYBOZU_TEST_EQUAL_ARRAY(x2, x3.getUnit(), x3.getUnitSize()); + for (int mode = 0; mode < 2; mode++) { + Unit x2[N+1] = {}; + mcl::bint::copyN(x2, x.getUnit(), x.getUnitSize()); + switch (mode) { + case 0: b = smp.mod(x2, x.getUnitSize()); break; + case 1: b = smp.modT(x2, x.getUnitSize()); break; + } + mpz_class x3 = x % p; + CYBOZU_TEST_ASSERT(b); + CYBOZU_TEST_EQUAL_ARRAY(x2, x3.getUnit(), x3.getUnitSize()); + } } #ifdef NDEBUG { if ((smp.p_[N-1] >> (MCL_UNIT_BIT_SIZE - 8)) == 0) return; // top 8-bit must be not zero Unit x[N+1]; mcl::gmp::getArray(x, N+1, p); - CYBOZU_BENCH_C("mod", 1000, setAndMod, smp, x); + CYBOZU_BENCH_C("mod ", 1000, setAndMod, smp, x, N+1); + CYBOZU_BENCH_C("modT", 1000, setAndModT, smp, x, N+1); } #endif } From db5acca070cab7d29c0bf0bd8e0b1c83d42fec64 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 20 Jun 2024 18:02:58 +0900 Subject: [PATCH 05/12] mov SmallModP to bint --- include/mcl/bint.hpp | 101 ++++++++++++++++++++++++++++++++++++++++- include/mcl/fp.hpp | 4 ++ include/mcl/op.hpp | 1 + src/fp.cpp | 1 + src/low_func.hpp | 86 ----------------------------------- test/bint_test.cpp | 92 +++++++++++++++++++++++++++++++++++++ test/low_func_test.cpp | 62 ------------------------- 7 files changed, 198 insertions(+), 149 deletions(-) diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp index 339265cb..e6ccdd07 100644 --- a/include/mcl/bint.hpp +++ b/include/mcl/bint.hpp @@ -8,7 +8,7 @@ */ #include -#include +#include #include #ifndef MCL_STANDALONE #include @@ -476,5 +476,104 @@ inline Unit getMontgomeryCoeff(Unit pLow, size_t bitSize = sizeof(Unit) * 8) return pp; } +struct SmallModP { + const size_t d = 16; // d = 26 if use double in approx + const size_t maxE_ = d - 2; + const Unit *p_; + size_t n_; + size_t l_; + uint32_t p0_; + + SmallModP() + : n_(0) + , l_(0) + , p0_(0) + { + } + // p must not be temporary. + void init(const Unit *p, size_t n) + { + p_ = p; + n_ = n; + l_ = mcl::fp::getBitSize(p, n); + Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); + mcl::bint::clearN(t, n_+1); + size_t pos = d + l_ - 1; + { + size_t q = pos / MCL_UNIT_BIT_SIZE; + size_t r = pos % MCL_UNIT_BIT_SIZE; + t[q] = Unit(1) << r; + } + // p0 = 2**(d+l-1)/p + Unit q[2]; + mcl::bint::div(q, 2, t, n_+1, p, n_); + assert(q[1] == 0); + p0_ = uint32_t(q[0]); + } + Unit approx(Unit x0, size_t a) const + { +// uint64_t t = uint64_t(double(x0) * double(p0_)); // for d = 26 + uint32_t t = uint32_t(x0 * p0_); + return Unit(t >> (2 * d + l_ - 1 - a)); + } + // x[xn] %= p + // the effective range of return value is [0, n_) + bool quot(Unit *pQ, const Unit *x, size_t xn) const + { + size_t a = mcl::fp::getBitSize(x, xn); + if (a < l_) { + *pQ = 0; + return true; + } + size_t e = a - l_ + 1; + if (e > maxE_) return false; + Unit x0 = mcl::fp::getUnitAt(x, xn, a - d); + *pQ = approx(x0, a); + return true; + } + // return false if x[0, xn) is large + bool mod(Unit *z, const Unit *x, size_t xn) const + { + assert(xn <= n_ + 1); + Unit Q; + if (!quot(&Q, x, xn)) return false; + if (Q == 0) return true; + Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); + t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); + mcl::bint::subN(t, x, t, n_+1); + if (mcl::bint::cmpGeN(t, p_, n_)) { + mcl::bint::subN(z, t, p_, n_); + } else { + mcl::bint::copyN(z, t, n_); + } + return true; + } + // return false if x[0, xn) is large + template + bool modT(Unit z[N], const Unit *x, size_t xn) const + { + assert(xn <= N + 1); + Unit Q; + if (!quot(&Q, x, xn)) return false; + if (Q == 0) return true; + Unit t[N+1]; + t[N] = mcl::bint::mulUnitT(t, p_, Q); + mcl::bint::subT(t, x, t); + if (mcl::bint::cmpGeT(t, p_)) { + mcl::bint::subT(z, t, p_); + } else { + mcl::bint::copyT(z, t); + } + return true; + } + template + static bool mulUnit(const SmallModP& smp, Unit z[N], const Unit x[N], Unit y) + { + Unit xy[N+1]; + xy[N] = mulUnitT(x, y); + return modT(z, xy, N+1); + } +}; + } } // mcl::bint diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 3d03c1e8..3aa7ea06 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -625,6 +625,10 @@ class FpT : public fp::Serializable, if (mulSmallUnit(z, x, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } + static inline void mulUnit2(FpT& z, const FpT& x, const Unit y) + { + op_.fp_mulUnit(z.v_, x.v_, y, op_.p); + } static inline void inv(FpT& y, const FpT& x) { assert(!x.isZero()); diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index abc3ffa8..f1d63768 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -186,6 +186,7 @@ struct Op { CYBOZU_ALIGN(8) char im[sizeof(mcl::inv::InvModT)]; mcl::Modp modp; mcl::SmallModp smallModp; + mcl::bint::SmallModP smallModP; Unit half[maxUnitSize]; // (p + 1) / 2 Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) /* diff --git a/src/fp.cpp b/src/fp.cpp index 650f8954..c72d7c77 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -581,6 +581,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size } modp.init(mp); smallModp.init(mp); + smallModP.init(p, N); return fp::initForMont(*this, p, mode); } diff --git a/src/low_func.hpp b/src/low_func.hpp index a20a3bfb..6f4ffda9 100644 --- a/src/low_func.hpp +++ b/src/low_func.hpp @@ -288,92 +288,6 @@ static void sqrModT(Unit *y, const Unit *x, const Unit *p) fpDblModT(y, xx, p); } -struct SmallModP { - const size_t d = 16; // d = 26 if use double in approx - const size_t maxE_ = d - 2; - const Unit *p_; - const size_t n_; - const size_t l_; - uint32_t p0_; - - // p must not be temporary. - explicit SmallModP(const Unit *p, size_t n) - : p_(p) - , n_(n) - , l_(getBitSize(p, n)) - { - Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); - mcl::bint::clearN(t, n_+1); - size_t pos = d + l_ - 1; - { - size_t q = pos / MCL_UNIT_BIT_SIZE; - size_t r = pos % MCL_UNIT_BIT_SIZE; - t[q] = Unit(1) << r; - } - // p0 = 2**(d+l-1)/p - Unit q[2]; - mcl::bint::div(q, 2, t, n_+1, p, n_); - assert(q[1] == 0); - p0_ = uint32_t(q[0]); - } - Unit approx(Unit x0, size_t a) const - { -// uint64_t t = uint64_t(double(x0) * double(p0_)); // for d = 26 - uint32_t t = uint32_t(x0 * p0_); - return Unit(t >> (2 * d + l_ - 1 - a)); - } - // x[xn] %= p - // the effective range of return value is [0, n_) - bool quot(Unit *pQ, const Unit *x, size_t xn) const - { - size_t a = getBitSize(x, xn); - if (a < l_) { - *pQ = 0; - return true; - } - size_t e = a - l_ + 1; - if (e > maxE_) return false; - Unit x0 = getUnitAt(x, xn, a - d); - *pQ = approx(x0, a); - return true; - } - // return false if x[0, xn) is large - bool mod(Unit *x, size_t xn) const - { - assert(xn <= n_ + 1); - Unit Q; - if (!quot(&Q, x, xn)) return false; - if (Q == 0) return true; - Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); - t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); - mcl::bint::subN(t, x, t, n_+1); - if (mcl::bint::cmpGeN(t, p_, n_)) { - mcl::bint::subN(x, t, p_, n_); - } else { - mcl::bint::copyN(x, t, n_); - } - return true; - } - template - // return false if x[0, xn) is large - bool modT(Unit *x, size_t xn) const - { - assert(xn <= N + 1); - Unit Q; - if (!quot(&Q, x, xn)) return false; - if (Q == 0) return true; - Unit t[N+1]; - t[N] = mcl::bint::mulUnitT(t, p_, Q); - mcl::bint::subT(t, x, t); - if (mcl::bint::cmpGeT(t, p_)) { - mcl::bint::subT(x, t, p_); - } else { - mcl::bint::copyT(x, t); - } - return true; - } -}; - } } // mcl::fp #ifdef _MSC_VER diff --git a/test/bint_test.cpp b/test/bint_test.cpp index 9023765d..111333bd 100644 --- a/test/bint_test.cpp +++ b/test/bint_test.cpp @@ -921,6 +921,98 @@ CYBOZU_TEST_AUTO(sqr) testSqr<9>(); } +template +void setAndModT(const mcl::bint::SmallModP& smp, Unit x[N+1]) +{ + x[N-1] = mcl::bint::mulUnit1(&x[N], x[N-1], x[0] & 0x3f); + size_t xn = x[N] == 0 ? N : N+1; + if (!smp.modT(x, x, xn)) { + puts("ERR2"); + exit(1); + } +} + +template +void setAndMod(const mcl::bint::SmallModP& smp, Unit x[N+1]) +{ + x[N-1] = mcl::bint::mulUnit1(&x[N], x[N-1], x[0] & 0x3f); + size_t xn = x[N] == 0 ? N : N+1; + if (!smp.mod(x, x, xn)) { + puts("ERR1"); + exit(1); + } +} + +template +void testSmallModP(const char *pStr) +{ + printf("p=%s\n", pStr); + Unit p[N]; + const size_t FACTOR = 128; + size_t xn = mcl::fp::hexToArray(p, N, pStr, strlen(pStr)); + CYBOZU_TEST_EQUAL(xn, N); + mcl::bint::SmallModP smp; + smp.init(p, N); + cybozu::XorShift rg; + Unit x[N+1]; + mcl::bint::copyT(x, p); + for (size_t i = 0; i < 10; i++) { + uint32_t a = rg.get32() % FACTOR; + x[N-1] = mcl::bint::mulUnit1(&x[N], x[N-1], a); + size_t xn = x[N] == 0 ? N : N+1; + Unit q[2], r[N+1]; + mcl::bint::copyN(r, x, xn); + mcl::bint::div(q, 2, r, xn, p, N); + CYBOZU_TEST_ASSERT(q[0] <= FACTOR && q[1] == 0); + for (int mode = 0; mode < 2; mode++) { + Unit r2[N]; + bool b = false; + switch (mode) { + case 0: b = smp.mod(r2, x, xn); break; + case 1: b = smp.modT(r2, x, xn); break; + } + CYBOZU_TEST_ASSERT(b); + CYBOZU_TEST_EQUAL_ARRAY(r2, r, N); + } + mcl::bint::copyT(x, r); + } +#ifdef NDEBUG + { + if ((smp.p_[N-1] >> (MCL_UNIT_BIT_SIZE - 8)) == 0) return; // top 8-bit must be not zero + CYBOZU_BENCH_C("mod ", 1000, setAndMod, smp, x); + CYBOZU_BENCH_C("modT", 1000, setAndModT, smp, x); + } +#endif +} + +CYBOZU_TEST_AUTO(SmallModP) +{ + const size_t adj = 8 / sizeof(Unit); + const char *tbl4[] = { + "2523648240000001ba344d80000000086121000000000013a700000000000013", + "73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001", // BLS12-381 r + "7523648240000001ba344d80000000086121000000000013a700000000000017", + "800000000000000000000000000000000000000000000000000000000000005f", + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", // secp256k1 + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime + // not primes + "ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000001", + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffff00000001", + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl4); i++) { + testSmallModP<4 * adj>(tbl4[i]); + } + const char *tbl6[] = { + "1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", // BLS12-381 p + "240026400f3d82b2e42de125b00158405b710818ac000007e0042f008e3e00000000001080046200000000000000000d", // BN381 r + "240026400f3d82b2e42de125b00158405b710818ac00000840046200950400000000001380052e000000000000000013", // BN381 p + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl6); i++) { + testSmallModP<6 * adj>(tbl6[i]); + } +} + #if 0 template void testMulLow() diff --git a/test/low_func_test.cpp b/test/low_func_test.cpp index 05a7e2df..b9ee62b9 100644 --- a/test/low_func_test.cpp +++ b/test/low_func_test.cpp @@ -72,66 +72,6 @@ void testEdge(const mpz_class& p) } } -template -void setAndModT(const mcl::fp::SmallModP& smp, Unit *x, size_t xn) -{ - x[smp.n_] = x[0] & 0x3f; - if (!smp.modT(x, xn)) { - puts("ERR2"); - exit(1); - } -} - -void setAndMod(const mcl::fp::SmallModP& smp, Unit *x, size_t xn) -{ - x[smp.n_] = x[0] & 0x3f; - if (!smp.mod(x, xn)) { - puts("ERR1"); - exit(1); - } -} - -template -void testSmallModP(const mpz_class& p) -{ - mcl::fp::SmallModP smp(p.getUnit(), N); - cybozu::XorShift rg; - mpz_class x; - for (size_t i = 0; i < 10; i++) { - x.setRand(p, rg); - x += p; - x *= int(rg.get32() % 128) + 1; - Unit Q1, Q2; - Q1 = (x / p).getLow32bit(); - bool b; - b = smp.quot(&Q2, x.getUnit(), x.getUnitSize()); - CYBOZU_TEST_ASSERT(b); - if (b) { - CYBOZU_TEST_ASSERT(Q1 == Q2 || Q1 == Q2 + 1); - } - for (int mode = 0; mode < 2; mode++) { - Unit x2[N+1] = {}; - mcl::bint::copyN(x2, x.getUnit(), x.getUnitSize()); - switch (mode) { - case 0: b = smp.mod(x2, x.getUnitSize()); break; - case 1: b = smp.modT(x2, x.getUnitSize()); break; - } - mpz_class x3 = x % p; - CYBOZU_TEST_ASSERT(b); - CYBOZU_TEST_EQUAL_ARRAY(x2, x3.getUnit(), x3.getUnitSize()); - } - } -#ifdef NDEBUG - { - if ((smp.p_[N-1] >> (MCL_UNIT_BIT_SIZE - 8)) == 0) return; // top 8-bit must be not zero - Unit x[N+1]; - mcl::gmp::getArray(x, N+1, p); - CYBOZU_BENCH_C("mod ", 1000, setAndMod, smp, x, N+1); - CYBOZU_BENCH_C("modT", 1000, setAndModT, smp, x, N+1); - } -#endif -} - CYBOZU_TEST_AUTO(limit) { const size_t adj = 8 / sizeof(Unit); @@ -154,7 +94,6 @@ CYBOZU_TEST_AUTO(limit) mpz_class p; mcl::gmp::setStr(p, tbl4[i], 16); testEdge<4 * adj>(p); - testSmallModP<4 * adj>(p); } const char *tbl6[] = { "1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", // BLS12-381 p @@ -166,7 +105,6 @@ CYBOZU_TEST_AUTO(limit) mpz_class p; mcl::gmp::setStr(p, tbl6[i], 16); testEdge<6 * adj>(p); - testSmallModP<6 * adj>(p); } } From 13695b643c3b27f72ba494f2b469d434afcae01a Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 21 Jun 2024 10:54:17 +0900 Subject: [PATCH 06/12] add mulSmallUnit2 temporary --- include/mcl/bint.hpp | 20 ++++++++++++++------ include/mcl/fp.hpp | 1 + include/mcl/op.hpp | 2 ++ src/fp.cpp | 1 + 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp index e6ccdd07..812a97c2 100644 --- a/include/mcl/bint.hpp +++ b/include/mcl/bint.hpp @@ -537,10 +537,14 @@ struct SmallModP { assert(xn <= n_ + 1); Unit Q; if (!quot(&Q, x, xn)) return false; - if (Q == 0) return true; + if (Q == 0) { + mcl::bint::copyN(z, x, n_); + return true; + } Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); - mcl::bint::subN(t, x, t, n_+1); + bool b = mcl::bint::subN(t, x, t, n_+1); + assert(!b); (void)b; if (mcl::bint::cmpGeN(t, p_, n_)) { mcl::bint::subN(z, t, p_, n_); } else { @@ -555,10 +559,14 @@ struct SmallModP { assert(xn <= N + 1); Unit Q; if (!quot(&Q, x, xn)) return false; - if (Q == 0) return true; + if (Q == 0) { + mcl::bint::copyT(z, x); + return true; + } Unit t[N+1]; t[N] = mcl::bint::mulUnitT(t, p_, Q); - mcl::bint::subT(t, x, t); + bool b = mcl::bint::subT(t, x, t); + assert(!b); (void)b; if (mcl::bint::cmpGeT(t, p_)) { mcl::bint::subT(z, t, p_); } else { @@ -570,8 +578,8 @@ struct SmallModP { static bool mulUnit(const SmallModP& smp, Unit z[N], const Unit x[N], Unit y) { Unit xy[N+1]; - xy[N] = mulUnitT(x, y); - return modT(z, xy, N+1); + xy[N] = mulUnitT(xy, x, y); + return smp.modT(z, xy, N+1); } }; diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 3aa7ea06..2dbb6147 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -627,6 +627,7 @@ class FpT : public fp::Serializable, } static inline void mulUnit2(FpT& z, const FpT& x, const Unit y) { + if (op_.mulSmallUnit2(op_.smallModP, z.v_, x.v_, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } static inline void inv(FpT& y, const FpT& x) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index f1d63768..8b7b2608 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -239,6 +239,7 @@ struct Op { void3u fp_mul2; void2uOp fp_invOp; void2uIu fp_mulUnit; // fp_mulUnitPre + bool (*mulSmallUnit2)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y); void3u fpDbl_mulPre; void2u fpDbl_sqrPre; @@ -329,6 +330,7 @@ struct Op { fp_mul2 = 0; fp_invOp = 0; fp_mulUnit = 0; + mulSmallUnit2 = 0; fpDbl_mulPre = 0; fpDbl_sqrPre = 0; diff --git a/src/fp.cpp b/src/fp.cpp index c72d7c77..f66010ec 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -300,6 +300,7 @@ void setOp(Op& op) op.fp_shr1 = shr1T; op.fp_neg = negT; op.fp_mulUnitPre = mulUnitPreT; + op.mulSmallUnit2 = bint::SmallModP::mulUnit; op.fp_addPre = bint::get_add(N); op.fp_subPre = bint::get_sub(N); op.fpDbl_addPre = bint::get_add(N * 2); From ad798ba59333e65589ac03680fff18c7e12d0c81 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Sat, 22 Jun 2024 06:27:44 +0900 Subject: [PATCH 07/12] compare some algorithms --- include/mcl/bint.hpp | 37 +++++++++++++++++++++++++++++-------- include/mcl/fp.hpp | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp index 812a97c2..51163d1b 100644 --- a/include/mcl/bint.hpp +++ b/include/mcl/bint.hpp @@ -477,9 +477,11 @@ inline Unit getMontgomeryCoeff(Unit pLow, size_t bitSize = sizeof(Unit) * 8) } struct SmallModP { - const size_t d = 16; // d = 26 if use double in approx - const size_t maxE_ = d - 2; + static const size_t d = 16; // d = 26 if use double in approx + static const size_t MAX_MUL_N = 10; + static const size_t maxE_ = d - 2; const Unit *p_; + Unit tbl_[MAX_MUL_N][MCL_MAX_UNIT_SIZE+1]; size_t n_; size_t l_; uint32_t p0_; @@ -509,6 +511,9 @@ struct SmallModP { mcl::bint::div(q, 2, t, n_+1, p, n_); assert(q[1] == 0); p0_ = uint32_t(q[0]); + for (size_t i = 0; i < MAX_MUL_N; i++) { + tbl_[i][n_] = mcl::bint::mulUnitN(tbl_[i], p_, Unit(i+1), n_); // 1~MAX_MUL_N + } } Unit approx(Unit x0, size_t a) const { @@ -542,16 +547,24 @@ struct SmallModP { return true; } Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit)); - t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); - bool b = mcl::bint::subN(t, x, t, n_+1); + const Unit *pQ = 0; + if (Q <= MAX_MUL_N) { + assert(Q > 0); + pQ = tbl_[Q-1]; + } else { + t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_); + pQ = t; + } + bool b = mcl::bint::subN(t, x, pQ, xn); assert(!b); (void)b; - if (mcl::bint::cmpGeN(t, p_, n_)) { + if (mcl::bint::cmpGeN(t, tbl_[0], xn)) { // tbl_[0] == p and tbl_[n_] = 0 mcl::bint::subN(z, t, p_, n_); } else { mcl::bint::copyN(z, t, n_); } return true; } +#if 1 // return false if x[0, xn) is large template bool modT(Unit z[N], const Unit *x, size_t xn) const @@ -564,22 +577,30 @@ struct SmallModP { return true; } Unit t[N+1]; - t[N] = mcl::bint::mulUnitT(t, p_, Q); - bool b = mcl::bint::subT(t, x, t); + const Unit *pQ = 0; + if (false&&Q <= MAX_MUL_N) { + pQ = tbl_[Q-1]; + } else { + t[N] = mcl::bint::mulUnitT(t, p_, Q); + pQ = t; + } + bool b = mcl::bint::subT(t, x, pQ); assert(!b); (void)b; - if (mcl::bint::cmpGeT(t, p_)) { + if (mcl::bint::cmpGeT(t, tbl_[0])) { mcl::bint::subT(z, t, p_); } else { mcl::bint::copyT(z, t); } return true; } +#endif template static bool mulUnit(const SmallModP& smp, Unit z[N], const Unit x[N], Unit y) { Unit xy[N+1]; xy[N] = mulUnitT(xy, x, y); return smp.modT(z, xy, N+1); +// return smp.mod(z, xy, N+1); } }; diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 2dbb6147..b22da06c 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -627,6 +627,7 @@ class FpT : public fp::Serializable, } static inline void mulUnit2(FpT& z, const FpT& x, const Unit y) { +// if (mulSmallUnit(z, x, y)) return; if (op_.mulSmallUnit2(op_.smallModP, z.v_, x.v_, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } From bc6b27a3ff01f247f77f631c8a6a0eb480144000 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 24 Jun 2024 12:06:02 +0900 Subject: [PATCH 08/12] rewrite SmallModP --- include/mcl/bint.hpp | 4 ++-- include/mcl/fp.hpp | 22 ++++++++++++++++++---- include/mcl/gmp_util.hpp | 3 ++- include/mcl/op.hpp | 6 +++--- include/mcl/util.hpp | 3 +++ src/fp.cpp | 4 ++-- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp index 51163d1b..7a601923 100644 --- a/include/mcl/bint.hpp +++ b/include/mcl/bint.hpp @@ -478,7 +478,7 @@ inline Unit getMontgomeryCoeff(Unit pLow, size_t bitSize = sizeof(Unit) * 8) struct SmallModP { static const size_t d = 16; // d = 26 if use double in approx - static const size_t MAX_MUL_N = 10; + static const size_t MAX_MUL_N = 1; // not used because mulSmallUnit is call at first. static const size_t maxE_ = d - 2; const Unit *p_; Unit tbl_[MAX_MUL_N][MCL_MAX_UNIT_SIZE+1]; @@ -578,7 +578,7 @@ struct SmallModP { } Unit t[N+1]; const Unit *pQ = 0; - if (false&&Q <= MAX_MUL_N) { + if (Q <= MAX_MUL_N) { pQ = tbl_[Q-1]; } else { t[N] = mcl::bint::mulUnitT(t, p_, Q); diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index b22da06c..0877bb7f 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -114,11 +114,14 @@ class FpT : public fp::Serializable, op_.fp_add(y, x, x, op_.p); } #endif +#if 0 static inline void mul9A(Unit *y, const Unit *x) { - mulSmall(y, x, 9); + mulUnit(y, x, 9); // op_.fp_mul9(y, x, op_.p); } +#endif +#if 0 static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y) { assert(y <= op_.smallModp.maxMulN); @@ -129,6 +132,7 @@ class FpT : public fp::Serializable, op_.fp_subPre(z, xy, pv); op_.fp_sub(z, z, op_.p, op_.p); } +#endif public: typedef FpT BaseFp; // return pointer to array v_[] @@ -187,9 +191,11 @@ class FpT : public fp::Serializable, if (op_.fp_mul2A_ == 0) { op_.fp_mul2A_ = mul2A; } +#if 0 if (op_.fp_mul9A_ == 0) { op_.fp_mul9A_ = mul9A; } +#endif #endif *pb = true; } @@ -608,14 +614,19 @@ class FpT : public fp::Serializable, } static void mul9(FpT& y, const FpT& x) { +#if 1 + mulUnit(y, x, 9); +#else #ifdef MCL_XBYAK_DIRECT_CALL op_.fp_mul9A_(y.v_, x.v_); #else mul9A(y.v_, x.v_); +#endif #endif } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } +#if 0 static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { mulSmall(z.v_, x.v_, y); @@ -625,12 +636,15 @@ class FpT : public fp::Serializable, if (mulSmallUnit(z, x, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } - static inline void mulUnit2(FpT& z, const FpT& x, const Unit y) +#endif + static inline void mulUnit(FpT& z, const FpT& x, const Unit y) { -// if (mulSmallUnit(z, x, y)) return; - if (op_.mulSmallUnit2(op_.smallModP, z.v_, x.v_, y)) return; + if (mcl::fp::mulSmallUnit(z, x, y)) return; + if (op_.mulSmallUnit(op_.smallModP, z.v_, x.v_, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } + // alias of mulUnit + static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { mulUnit(z, x, y); } static inline void inv(FpT& y, const FpT& x) { assert(!x.isZero()); diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index afb50192..c9a5c0f1 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -949,6 +949,7 @@ class SquareRoot { #endif }; +#if 0 /* x mod p for a small value x < (pMulTblN * p). */ @@ -1026,7 +1027,7 @@ struct SmallModp { } } }; - +#endif /* Barrett Reduction diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 8b7b2608..0358b7ed 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -185,7 +185,7 @@ struct Op { mcl::SquareRoot sq; CYBOZU_ALIGN(8) char im[sizeof(mcl::inv::InvModT)]; mcl::Modp modp; - mcl::SmallModp smallModp; +// mcl::SmallModp smallModp; mcl::bint::SmallModP smallModP; Unit half[maxUnitSize]; // (p + 1) / 2 Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) @@ -239,7 +239,7 @@ struct Op { void3u fp_mul2; void2uOp fp_invOp; void2uIu fp_mulUnit; // fp_mulUnitPre - bool (*mulSmallUnit2)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y); + bool (*mulSmallUnit)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y); void3u fpDbl_mulPre; void2u fpDbl_sqrPre; @@ -330,7 +330,7 @@ struct Op { fp_mul2 = 0; fp_invOp = 0; fp_mulUnit = 0; - mulSmallUnit2 = 0; + mulSmallUnit = 0; fpDbl_mulPre = 0; fpDbl_sqrPre = 0; diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp index eea1c86a..81183118 100644 --- a/include/mcl/util.hpp +++ b/include/mcl/util.hpp @@ -154,10 +154,13 @@ bool mulSmallUnit(T& z, const T& x, U y) case 6: { T t; T::add(t, x, x); T::add(t, t, x); T::add(z, t, t); break; } case 7: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 8: T::add(z, x, x); T::add(z, z, z); T::add(z, z, z); break; + // slower than SmallModP +#if 0 case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; } case 10: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, x); T::add(z, t, t); break; } case 11: { T t; T::add(t, x, x); T::add(t, t, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 12: { T t; T::add(t, x, x); T::add(t, t, t); T::add(z, t, t); T::add(z, z, t); break; } +#endif default: return false; } diff --git a/src/fp.cpp b/src/fp.cpp index f66010ec..e430be47 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -300,7 +300,7 @@ void setOp(Op& op) op.fp_shr1 = shr1T; op.fp_neg = negT; op.fp_mulUnitPre = mulUnitPreT; - op.mulSmallUnit2 = bint::SmallModP::mulUnit; + op.mulSmallUnit = bint::SmallModP::mulUnit; op.fp_addPre = bint::get_add(N); op.fp_subPre = bint::get_sub(N); op.fpDbl_addPre = bint::get_add(N * 2); @@ -581,7 +581,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size if (!b) return false; } modp.init(mp); - smallModp.init(mp); +// smallModp.init(mp); smallModP.init(p, N); return fp::initForMont(*this, p, mode); } From 63addd0c1a905c733e2cde6f03673d9f69445f15 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 24 Jun 2024 13:43:40 +0900 Subject: [PATCH 09/12] mulSmallUnit support 9 --- include/mcl/util.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp index 81183118..36706928 100644 --- a/include/mcl/util.hpp +++ b/include/mcl/util.hpp @@ -140,6 +140,7 @@ class BitIterator { /* shortcut of multiplication by Unit + remark : support b times where y^2=x^3+a x + b. */ template bool mulSmallUnit(T& z, const T& x, U y) @@ -154,9 +155,10 @@ bool mulSmallUnit(T& z, const T& x, U y) case 6: { T t; T::add(t, x, x); T::add(t, t, x); T::add(z, t, t); break; } case 7: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 8: T::add(z, x, x); T::add(z, z, z); T::add(z, z, z); break; + // require FpDblT::mulPre for xi.a = 9 + case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; } // slower than SmallModP #if 0 - case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; } case 10: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, x); T::add(z, t, t); break; } case 11: { T t; T::add(t, x, x); T::add(t, t, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 12: { T t; T::add(t, x, x); T::add(t, t, t); T::add(z, t, t); T::add(z, z, t); break; } From 9853d26899114fe29c0d2a3958c95911a51c64b3 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 24 Jun 2024 13:45:45 +0900 Subject: [PATCH 10/12] add test of mulUnit --- Makefile | 1 + test/smallmodp_test.cpp | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 test/smallmodp_test.cpp diff --git a/Makefile b/Makefile index dabe95a3..4cf29baa 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ TEST_SRC+=ecdsa_test.cpp ecdsa_c_test.cpp TEST_SRC+=mul_test.cpp TEST_SRC+=bint_test.cpp TEST_SRC+=low_func_test.cpp +TEST_SRC+=smallmodp_test.cpp ifneq ($(MCL_USE_GMP),1) TEST_SRC+=static_init_test.cpp endif diff --git a/test/smallmodp_test.cpp b/test/smallmodp_test.cpp new file mode 100644 index 00000000..666302b5 --- /dev/null +++ b/test/smallmodp_test.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +using namespace mcl; +using namespace mcl::bn; + +template +void test(const char *s) +{ + puts(s); + cybozu::XorShift rg; + F x, z1, z2; + for (size_t i = 0; i < 1000; i++) { + x.setByCSPRNG(rg); + uint32_t y = uint32_t(i); + z1 = x * y; + F::mulUnit(z2, x, y); + CYBOZU_TEST_EQUAL(z1, z2); + } +#ifdef NDEBUG + const int C = 100000; + for (uint32_t i = 1; i < 10; i++) { + printf("i=% 2d ", i); + CYBOZU_BENCH_C("mulUnit", C, F::mulUnit, x, x, i); + } + CYBOZU_BENCH_C("mulUnit [1-256]", C, F::mulUnit, x, x, (*x.getUnit() % 256) + 1); + CYBOZU_BENCH_C("mulUnit [1000-1255]", C, F::mulUnit, x, x, (*x.getUnit() % 256) + 1000); + CYBOZU_BENCH_C("mulUnit all", C, F::mulUnit, x, x, uint32_t(*x.getUnit())); + CYBOZU_BENCH_C("mul(F, u32)", C, F::mul, x, x, uint32_t(*x.getUnit())); + CYBOZU_BENCH_C("mul(F, F)", C, F::mul, x, x, x); + CYBOZU_TEST_ASSERT(x != 0); +#endif +} + +CYBOZU_TEST_AUTO(main) +{ + initPairing(BLS12_381); + test("Fr"); + test("Fp"); +} From 446d9a4481bbe112119770dac26637ad1a4097c2 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 24 Jun 2024 14:12:56 +0900 Subject: [PATCH 11/12] remove old code --- include/mcl/fp.hpp | 43 --------------------- include/mcl/gmp_util.hpp | 80 ---------------------------------------- include/mcl/op.hpp | 2 - 3 files changed, 125 deletions(-) diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 0877bb7f..d1f9f9a0 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -114,25 +114,6 @@ class FpT : public fp::Serializable, op_.fp_add(y, x, x, op_.p); } #endif -#if 0 - static inline void mul9A(Unit *y, const Unit *x) - { - mulUnit(y, x, 9); -// op_.fp_mul9(y, x, op_.p); - } -#endif -#if 0 - static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y) - { - assert(y <= op_.smallModp.maxMulN); - Unit xy[maxSize + 1]; - op_.fp_mulUnitPre(xy, x, y); - int v = op_.smallModp.approxMul(xy); - const Unit *pv = op_.smallModp.getPmul(v); - op_.fp_subPre(z, xy, pv); - op_.fp_sub(z, z, op_.p, op_.p); - } -#endif public: typedef FpT BaseFp; // return pointer to array v_[] @@ -191,11 +172,6 @@ class FpT : public fp::Serializable, if (op_.fp_mul2A_ == 0) { op_.fp_mul2A_ = mul2A; } -#if 0 - if (op_.fp_mul9A_ == 0) { - op_.fp_mul9A_ = mul9A; - } -#endif #endif *pb = true; } @@ -614,29 +590,10 @@ class FpT : public fp::Serializable, } static void mul9(FpT& y, const FpT& x) { -#if 1 mulUnit(y, x, 9); -#else -#ifdef MCL_XBYAK_DIRECT_CALL - op_.fp_mul9A_(y.v_, x.v_); -#else - mul9A(y.v_, x.v_); -#endif -#endif } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } -#if 0 - static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) - { - mulSmall(z.v_, x.v_, y); - } - static inline void mulUnit(FpT& z, const FpT& x, const Unit y) - { - if (mulSmallUnit(z, x, y)) return; - op_.fp_mulUnit(z.v_, x.v_, y, op_.p); - } -#endif static inline void mulUnit(FpT& z, const FpT& x, const Unit y) { if (mcl::fp::mulSmallUnit(z, x, y)) return; diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index c9a5c0f1..b0e731fc 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -949,86 +949,6 @@ class SquareRoot { #endif }; -#if 0 -/* - x mod p for a small value x < (pMulTblN * p). -*/ -struct SmallModp { - static const size_t unitBitSize = sizeof(Unit) * 8; - static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1; - static const size_t maxMulN = 9; - static const size_t pMulTblN = maxMulN + 1; - uint32_t N_; - uint32_t shiftL_; - uint32_t shiftR_; - uint32_t maxIdx_; - // pMulTbl_[i] = (p * i) >> (pBitSize_ - 1) - Unit pMulTbl_[pMulTblN][maxTblSize]; - // idxTbl_[x] = (x << (pBitSize_ - 1)) / p - uint8_t idxTbl_[pMulTblN * 2]; - // return x >> (pBitSize_ - 1) - SmallModp() - : N_(0) - , shiftL_(0) - , shiftR_(0) - , maxIdx_(0) - , pMulTbl_() - , idxTbl_() - { - } - // return argmax { i : x > i * p } - uint32_t approxMul(const Unit *x) const - { - uint32_t top = getTop(x); - assert(top <= maxIdx_); - return idxTbl_[top]; - } - const Unit *getPmul(size_t v) const - { - assert(v < pMulTblN); - return pMulTbl_[v]; - } - uint32_t getTop(const Unit *x) const - { - if (shiftR_ == 0) return x[N_ - 1]; - return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_); - } - uint32_t cvtInt(const mpz_class& x) const - { - assert(mcl::gmp::getUnitSize(x) <= 1); - if (x == 0) { - return 0; - } else { - return uint32_t(mcl::gmp::getUnit(x)[0]); - } - } - void init(const mpz_class& p) - { - size_t pBitSize = mcl::gmp::getBitSize(p); - N_ = uint32_t((pBitSize + unitBitSize - 1) / unitBitSize); - shiftR_ = (pBitSize - 1) % unitBitSize; - shiftL_ = unitBitSize - shiftR_; - mpz_class t = 0; - for (size_t i = 0; i < pMulTblN; i++) { - bool b; - mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t); - assert(b); - (void)b; - if (i == pMulTblN - 1) { - maxIdx_ = getTop(pMulTbl_[i]); - assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_)); - break; - } - t += p; - } - - for (uint32_t i = 0; i <= maxIdx_; i++) { - idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p); - } - } -}; -#endif - /* Barrett Reduction for non GMP version diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 0358b7ed..94daa635 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -211,7 +211,6 @@ struct Op { void3u fp_mulA_; void2u fp_sqrA_; void2u fp_mul2A_; - void2u fp_mul9A_; void3u fp2_addA_; void3u fp2_subA_; void2u fp2_negA_; @@ -302,7 +301,6 @@ struct Op { fp_mulA_ = 0; fp_sqrA_ = 0; fp_mul2A_ = 0; - fp_mul9A_ = 0; fp2_addA_ = 0; fp2_subA_ = 0; fp2_negA_ = 0; From b7470597da8c549489c0d46d41ac95861141f869 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 24 Jun 2024 14:13:20 +0900 Subject: [PATCH 12/12] v1.94 --- include/mcl/op.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 94daa635..e11151a3 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -29,7 +29,7 @@ namespace mcl { -static const int version = 0x193; /* 0xABC = A.BC */ +static const int version = 0x194; /* 0xABC = A.BC */ /* specifies available string format mode for X::setIoMode()