From 239afa7966842b10da77de79379f74e4611a412f Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 26 Apr 2024 15:34:38 +0900 Subject: [PATCH 01/28] add testMulVec to bls12_381 --- test/bls12_test.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp index ee707a82..ded566c4 100644 --- a/test/bls12_test.cpp +++ b/test/bls12_test.cpp @@ -359,6 +359,32 @@ void testSerialize(const G1& P, const G2& Q) #include "bench.hpp" +void testMulVec() +{ + puts("testMulVec"); + const size_t n = 8192; + cybozu::XorShift rg; + std::vector Pvec(n); + std::vector xVec(n); + hashAndMapToG1(Pvec[0], "abc", 3); + for (size_t i = 1; i < n; i++) { + G1::add(Pvec[i], Pvec[i-1], Pvec[0]); + } + for (size_t i = 0; i < n; i++) { + xVec[i].setByCSPRNG(rg); + } + G1 P; + G1 P8191; + P8191.setStr("1 c252fef934098904eca8e3fbd9cc8c78877e434d9ce01e424ef07302cec5652dc17d341b8abd4278255a75718cebd67 17455f24f76e7e7d1dd3231d8f144a40decc40d5b129734879b8aad4a209a2e6d83d8256221e46aaf8205e254355d9ad", 16); + G1 P8192; + P8192.setStr("1 f0d44ba84af56d1db97f46660bfd12401aae239a6650cdfc168158d1076d68c5149ac3a311b9c058ad4e61ad1b8063 b2240da1e42c5f469ccf818e58901aca2283d1bd29565f5efbfa14e48cdae199c7a7981b958bfec332f6e613cf36990", 16); + G1::mulVec(P, Pvec.data(), xVec.data(), n-1); + CYBOZU_TEST_EQUAL(P, P8191); + G1::mulVec(P, Pvec.data(), xVec.data(), n); + CYBOZU_TEST_EQUAL(P, P8192); +} + + CYBOZU_TEST_AUTO(naive) { for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(g_testSetTbl); i++) { @@ -375,6 +401,7 @@ CYBOZU_TEST_AUTO(naive) clk.put(); return; #endif + testMulVec(); testSerialize(P, Q); testParam(ts); testIo(P, Q); @@ -857,7 +884,6 @@ CYBOZU_TEST_AUTO(verifyG2) CYBOZU_TEST_ASSERT(n == 0); } - typedef std::vector FpVec; void f(FpVec& zv, const FpVec& xv, const FpVec& yv) From f10582144854799bf2587d4cea6cd654826c55c1 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 23 Apr 2024 14:48:42 +0900 Subject: [PATCH 02/28] add mulEach --- include/mcl/ec.hpp | 19 ++++++++++++++++++- test/common_test.hpp | 11 +++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 88e9092b..09ef7ced 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -1316,6 +1316,7 @@ class EcT : public fp::Serializable > { static mpz_class order_; static bool (*mulVecGLV)(EcT& z, const EcT *xVec, const void *yVec, size_t n, bool constTime); static void (*mulVecOpti)(Unit *z, Unit *xVec, const Unit *yVec, size_t n); + static void (*mulEachOpti)(void *xVec, const void *yVec, size_t n); static bool (*isValidOrderFast)(const EcT& x); /* default constructor is undefined value */ EcT() {} @@ -1382,6 +1383,7 @@ class EcT : public fp::Serializable > { order_ = 0; mulVecGLV = 0; mulVecOpti = 0; + mulEachOpti = 0; isValidOrderFast = 0; mode_ = mode; } @@ -2080,7 +2082,7 @@ class EcT : public fp::Serializable > { return; } if (mulVecOpti && n >= 128) { - mulVecOpti((Unit*)&z, (Unit*)xVec, (const Unit*)yVec, n); + mulVecOpti((Unit*)&z, (Unit*)xVec, yVec[0].getUnit(), n); return; } if (mulVecGLV && mulVecGLV(z, xVec, yVec, n, false)) { @@ -2133,6 +2135,20 @@ class EcT : public fp::Serializable > { mulVec(z, xVec, yVec, n); #endif } + // xVec[i] *= yVec[i] + static void mulEach(EcT *xVec, const EcT::Fr *yVec, size_t n) + { + if (mulEachOpti && n >= 8) { + size_t n8 = n & ~size_t(7); + mulEachOpti(xVec, yVec, n8); + xVec += n8; + yVec += n8; + n -= n8; + } + for (size_t i = 0; i < n; i++) { + xVec[i] *= yVec[i]; + } + } #ifndef CYBOZU_DONT_USE_EXCEPTION static inline void init(const std::string& astr, const std::string& bstr, int mode = ec::Jacobi) { @@ -2192,6 +2208,7 @@ template bool (*EcT::mulVecGLV)(EcT& z, const EcT *x template void (*EcT::mulVecOpti)(Unit *z, Unit *xVec, const Unit *yVec, size_t n); template bool (*EcT::isValidOrderFast)(const EcT& x); template int EcT::mode_; +template void (*EcT::mulEachOpti)(void *xVec, const void *yVec, size_t n); // r = the order of Ec template diff --git a/test/common_test.hpp b/test/common_test.hpp index 99d04fb2..0b47589e 100644 --- a/test/common_test.hpp +++ b/test/common_test.hpp @@ -57,6 +57,17 @@ void testMulVec(const G& P) CYBOZU_BENCH_C("mulVecCopy", C, mulVecCopy, Q1, xVec.data(), yVec.data(), n, x0Vec.data()); #endif } + puts("mulEach"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) { + const size_t n = nTbl[i]; + xVec = x0Vec; + G::mulEach(xVec.data(), yVec.data(), n); + for (size_t j = 0; j < n; j++) { + G T; + G::mul(T, x0Vec[i], yVec[i]); + CYBOZU_TEST_EQUAL(xVec[i], T); + } + } } template From f6fbf30c0c9acec582789b72eb54c77834e3beab Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 23 Apr 2024 15:55:09 +0900 Subject: [PATCH 03/28] add mulEachAVX512 --- include/mcl/bn.hpp | 2 ++ include/mcl/ec.hpp | 10 +++++++--- sample/mt_test.cpp | 1 + src/msm_avx.cpp | 19 ++++++++++++++++++- test/common_test.hpp | 3 +++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp index 8e8fb9ca..b6b3bb65 100644 --- a/include/mcl/bn.hpp +++ b/include/mcl/bn.hpp @@ -48,6 +48,7 @@ namespace msm { bool initMsm(const mcl::CurveParam& cp, const Param *param); void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n); +void mulEachAVX512(Unit *_x, const Unit *_y, size_t n); } // mcl::msm #endif @@ -2314,6 +2315,7 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode if (sizeof(Unit) == 8 && sizeof(Fp) == sizeof(mcl::msm::FpA) && sizeof(Fr) == sizeof(mcl::msm::FrA)) { if (mcl::msm::initMsm(cp, ¶)) { G1::setMulVecOpti(mcl::msm::mulVecAVX512); + G1::setMulEachOpti(mcl::msm::mulEachAVX512); } } #endif diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 09ef7ced..4230295a 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -1316,7 +1316,7 @@ class EcT : public fp::Serializable > { static mpz_class order_; static bool (*mulVecGLV)(EcT& z, const EcT *xVec, const void *yVec, size_t n, bool constTime); static void (*mulVecOpti)(Unit *z, Unit *xVec, const Unit *yVec, size_t n); - static void (*mulEachOpti)(void *xVec, const void *yVec, size_t n); + static void (*mulEachOpti)(Unit *xVec, const Unit *yVec, size_t n); static bool (*isValidOrderFast)(const EcT& x); /* default constructor is undefined value */ EcT() {} @@ -1414,6 +1414,10 @@ class EcT : public fp::Serializable > { { mulVecOpti = f; } + static void setMulEachOpti(void f(Unit *_xVec, const Unit *_yVec, size_t yn)) + { + mulEachOpti = f; + } static inline void init(bool *pb, const char *astr, const char *bstr, int mode = ec::Jacobi) { Fp a, b; @@ -2140,7 +2144,7 @@ class EcT : public fp::Serializable > { { if (mulEachOpti && n >= 8) { size_t n8 = n & ~size_t(7); - mulEachOpti(xVec, yVec, n8); + mulEachOpti((Unit*)xVec, yVec[0].getUnit(), n8); xVec += n8; yVec += n8; n -= n8; @@ -2208,7 +2212,7 @@ template bool (*EcT::mulVecGLV)(EcT& z, const EcT *x template void (*EcT::mulVecOpti)(Unit *z, Unit *xVec, const Unit *yVec, size_t n); template bool (*EcT::isValidOrderFast)(const EcT& x); template int EcT::mode_; -template void (*EcT::mulEachOpti)(void *xVec, const void *yVec, size_t n); +template void (*EcT::mulEachOpti)(Unit *xVec, const Unit *yVec, size_t n); // r = the order of Ec template diff --git a/sample/mt_test.cpp b/sample/mt_test.cpp index 9072f31a..47af39fe 100644 --- a/sample/mt_test.cpp +++ b/sample/mt_test.cpp @@ -65,6 +65,7 @@ int main(int argc, char *argv[]) P2.clear(); CYBOZU_BENCH_C("G1 multi ", C, G1::mulVecMT, P2, Pvec.data(), xVec.data(), n, cpuN); if (P1 != P2) puts("G1::mulVecMT err"); + CYBOZU_BENCH_C("G1 mulEach", C, G1::mulEach, Pvec.data(), xVec.data(), n); G2 Q1, Q2; CYBOZU_BENCH_C("G2 single", C, G2::mulVec, Q1, Qvec.data(), xVec.data(), n); CYBOZU_BENCH_C("G2 multi ", C, G2::mulVecMT, Q2, Qvec.data(), xVec.data(), n, cpuN); diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 997660c2..b2498803 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1173,7 +1173,6 @@ struct EcM { { const bool isProj = false; const bool mixed = true; -// mcl::ec::normalizeVec(_P, _P, 8); g_param.normalizeVecG1(_P, _P, 8); EcM P, Q; P.setG1(_P, isProj); @@ -1387,6 +1386,24 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) } } +void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) +{ + assert(n % 8 == 0); + const bool isProj = false; + const bool mixed = true; + mcl::msm::G1A *x = (mcl::msm::G1A*)_x; + const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; + g_param.normalizeVecG1(x, x, n); + for (size_t i = 0; i < n; i += 8) { + EcM P; + Vec yv[4]; + cvtFr8toVec4(yv, y+i); + P.setG1(x+i, isProj); + EcM::mulGLV(P, P, yv); + P.getG1(x+i); + } +} + bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) { if (cp != mcl::BLS12_381) return false; diff --git a/test/common_test.hpp b/test/common_test.hpp index 0b47589e..e1729eab 100644 --- a/test/common_test.hpp +++ b/test/common_test.hpp @@ -33,6 +33,9 @@ void testMulVec(const G& P) cybozu::XorShift rg; for (size_t i = 0; i < N; i++) { G::mul(x0Vec[i], P, i + 3); + if (i == 30) { + x0Vec[i].clear(); // x0Vec[i] contains zero value + } xVec[i] = x0Vec[i]; yVec[i].setByCSPRNG(rg); } From af9fc4b65b3f1c2655d81871ef1acf465934fd45 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 23 Apr 2024 15:56:51 +0900 Subject: [PATCH 04/28] [doc] performance about mulEach --- misc/internal.md | 4 ++++ readme.md | 1 + 2 files changed, 5 insertions(+) diff --git a/misc/internal.md b/misc/internal.md index b1b2fdd2..c83f684d 100644 --- a/misc/internal.md +++ b/misc/internal.md @@ -16,6 +16,10 @@ w/o IFMA|66.498|122.666|227.042|426.498 w IFMA|46.411|87.002|153.958|300.331 speed up rate|1.43|1.41|1.47|1.42 +G1 mulEach +- w/o IFMA : 42.166Mclk +- w IFMA : 16.643Mclk + # GLV method ## Split function for BLS12-381 diff --git a/readme.md b/readme.md index 13eb7c5b..0a8d8d72 100644 --- a/readme.md +++ b/readme.md @@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography, which supports the optimal Ate pairing over BN curves and BLS12-381 curves. # News +- mulEach with AVX-512 IFMA is 2.5 times faster than G1::mul on BLS12-381 - mulVec (multi scalar multiplication) with AVX-512 IFMA is 1.4 times faster on Xeon w9-3495X - a little performance improvement of G1::mulVec of BLS12-381 - improve performance of Fr::inv on M1 mac From 6d9851b3edb66f9e49f018711e2ed4ab3d3da5b0 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 23 Apr 2024 17:40:46 +0900 Subject: [PATCH 05/28] add C interface of G1::mulEach --- include/mcl/bn.h | 3 +++ include/mcl/impl/bn_c_impl.hpp | 4 ++++ src/msm_avx.cpp | 2 +- test/bn_c_test.hpp | 26 +++++++++++++++++++++++++- test/common_test.hpp | 4 ++-- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/include/mcl/bn.h b/include/mcl/bn.h index 41ac9b06..8aa4b81d 100644 --- a/include/mcl/bn.h +++ b/include/mcl/bn.h @@ -485,6 +485,9 @@ MCLBN_DLL_API void mclBnG1_mulVec(mclBnG1 *z, mclBnG1 *x, const mclBnFr *y, mclS MCLBN_DLL_API void mclBnG2_mulVec(mclBnG2 *z, mclBnG2 *x, const mclBnFr *y, mclSize n); MCLBN_DLL_API void mclBnGT_powVec(mclBnGT *z, const mclBnGT *x, const mclBnFr *y, mclSize n); +// x[i] *= y[i] +MCLBN_DLL_API void mclBnG1_mulEach(mclBnG1 *x, const mclBnFr *y, mclSize n); + MCLBN_DLL_API void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y); MCLBN_DLL_API void mclBn_finalExp(mclBnGT *y, const mclBnGT *x); MCLBN_DLL_API void mclBn_millerLoop(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y); diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp index 00f49c5f..77bef385 100644 --- a/include/mcl/impl/bn_c_impl.hpp +++ b/include/mcl/impl/bn_c_impl.hpp @@ -639,6 +639,10 @@ void mclBnGT_powVec(mclBnGT *z, const mclBnGT *x, const mclBnFr *y, mclSize n) { GT::powVec(*cast(z), cast(x), cast(y), n); } +void mclBnG1_mulEach(mclBnG1 *x, const mclBnFr *y, mclSize n) +{ + G1::mulEach(cast(x), cast(y), n); +} void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y) { diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index b2498803..f14b596e 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1389,7 +1389,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) { assert(n % 8 == 0); - const bool isProj = false; + const bool isProj = true; const bool mixed = true; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp index febe27fc..40bacb61 100644 --- a/test/bn_c_test.hpp +++ b/test/bn_c_test.hpp @@ -1078,13 +1078,21 @@ CYBOZU_TEST_AUTO(mulVec) for (size_t i = 0; i < N; i++) { char c = char('a' + i); mclBnG1_hashAndMapTo(&x1Vec[i], &c, 1); + if (i == 10) { + mclBnG1_clear(&x1Vec[i]); // x1Vec[i] contains zero + } mclBnG2_hashAndMapTo(&x2Vec[i], &c, 1); mclBn_pairing(&xtVec[i], &x1Vec[i], &x2Vec[i]); - mclBnFr_setByCSPRNG(&yVec[i]); +// mclBnFr_setByCSPRNG(&yVec[i]); + mclBnFr_setHashOf(&yVec[i], &c, 1); } + mclBnG1 x1Vec2[N]; + memcpy(x1Vec2, x1Vec, sizeof(x1Vec)); + mclBnG1_mulVec(&z1, x1Vec, yVec, N); mclBnG2_mulVec(&z2, x2Vec, yVec, N); mclBnGT_powVec(&zt, xtVec, yVec, N); + mclBnG1_mulEach(x1Vec2, yVec, N); mclBnG1_clear(&w1); mclBnG2_clear(&w2); @@ -1094,6 +1102,22 @@ CYBOZU_TEST_AUTO(mulVec) mclBnG2 t2; mclBnGT tt; mclBnG1_mul(&t1, &x1Vec[i], &yVec[i]); + CYBOZU_TEST_ASSERT(mclBnG1_isEqual(&t1, &x1Vec2[i])); +#if 0 + if (mclBnG1_isEqual(&t1, &x1Vec2[i]) == 0) { + char buf[1024]; + printf("i=%zd\n", i); + mclBnG1_getStr(buf, sizeof(buf), &x1Vec[i], 10); + printf("x1=%s\n", buf); + mclBnFr_getStr(buf, sizeof(buf), &yVec[i], 10); + printf("y=%s\n", buf); + mclBnG1_getStr(buf, sizeof(buf), &t1, 10); + printf("xy=%s\n", buf); + mclBnG1_getStr(buf, sizeof(buf), &x1Vec2[i], 10); + printf("ng=%s\n", buf); + exit(1); + } +#endif mclBnG2_mul(&t2, &x2Vec[i], &yVec[i]); mclBnGT_pow(&tt, &xtVec[i], &yVec[i]); mclBnG1_add(&w1, &w1, &t1); diff --git a/test/common_test.hpp b/test/common_test.hpp index e1729eab..e00cd726 100644 --- a/test/common_test.hpp +++ b/test/common_test.hpp @@ -67,8 +67,8 @@ void testMulVec(const G& P) G::mulEach(xVec.data(), yVec.data(), n); for (size_t j = 0; j < n; j++) { G T; - G::mul(T, x0Vec[i], yVec[i]); - CYBOZU_TEST_EQUAL(xVec[i], T); + G::mul(T, x0Vec[j], yVec[j]); + CYBOZU_TEST_EQUAL(xVec[j], T); } } } From 6683df41ccb7e961c20932af6795e01235b85961 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 26 Apr 2024 09:54:06 +0900 Subject: [PATCH 06/28] split function for BLS12-381 to ec::local --- include/mcl/bn.hpp | 31 +++++-------------------------- include/mcl/ec.hpp | 29 +++++++++++++++++++++++++++++ src/msm_avx.cpp | 31 +++++-------------------------- 3 files changed, 39 insertions(+), 52 deletions(-) diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp index b6b3bb65..80a5fbde 100644 --- a/include/mcl/bn.hpp +++ b/include/mcl/bn.hpp @@ -726,34 +726,13 @@ struct GLV1 : mcl::GLV1T { } static inline void optimizedSplitForBLS12_381(mpz_class u[2], const mpz_class& x) { - assert(sizeof(Unit) == 8); - /* - z = -0xd201000000010000 - L = z^2-1 = 0xac45a4010001a40200000000ffffffff - r = L^2+L+1 = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 - s=255 - v = 0xbe35f678f00fd56eb1fb72917b67f718 - */ - mpz_class& a = u[0]; - mpz_class& b = u[1]; - static const uint64_t Lv[] = { 0x00000000ffffffff, 0xac45a4010001a402 }; - static const uint64_t vv[] = { 0xb1fb72917b67f718, 0xbe35f678f00fd56e }; static const size_t n = 128 / mcl::UnitBitSize; - Unit t[n*3]; - // n = 128 bit - // t[n*3] = x[n*2] * vv[n] - mcl::bint::mulNM(t, gmp::getUnit(x), n*2, (const Unit*)vv, n); - // t[n] <- t[n*3] - mcl::bint::shrT(t, t+n*2-1, mcl::UnitBitSize-1); // >>255 + Unit xa[n*2], a[2], b[2]; + mcl::gmp::getArray(xa, n*2, x); + ec::local::optimizedSplitRawForBLS12_381(a, b, xa); bool dummy; - gmp::setArray(&dummy, b, t, n); - Unit t2[n*2]; - // t2[n*2] = t[n] * Lv[n] - // Do not overlap I/O buffers on pre-Broadwell CPUs. - mcl::bint::mulT(t2, t, (const Unit*)Lv); - // t[n] = x[n*2] - t2[n*2] - mcl::bint::subT(t, gmp::getUnit(x), t2); - gmp::setArray(&dummy, a, t, n); + gmp::setArray(&dummy, u[0], a, n); + gmp::setArray(&dummy, u[1], b, n); (void)dummy; } }; diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 4230295a..847802c0 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -242,6 +242,35 @@ void normalizeVecT(Eout& Q, Ein& P, size_t n, size_t N = 256) } } +inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) +{ + assert(sizeof(Unit) == 8); + /* + z = -0xd201000000010000 + L = z^2-1 = 0xac45a4010001a40200000000ffffffff + r = L^2+L+1 = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 + s=255 + v = 0xbe35f678f00fd56eb1fb72917b67f718 + */ + static const uint64_t Lv[] = { 0x00000000ffffffff, 0xac45a4010001a402 }; + static const uint64_t vv[] = { 0xb1fb72917b67f718, 0xbe35f678f00fd56e }; + static const size_t n = 128 / mcl::UnitBitSize; + Unit t[n*3]; + // n = 128 bit + // t[n*3] = x[n*2] * vv[n] + mcl::bint::mulNM(t, x, n*2, vv, n); + // b[n] = t[n*3]>>255 + mcl::bint::shrT(t, t+n*2-1, mcl::UnitBitSize-1); // >>255 + b[0] = t[0]; + b[1] = t[1]; + Unit t2[n*2]; + // t2[n*2] = t[n] * Lv[n] + // Do not overlap I/O buffers on pre-Broadwell CPUs. + mcl::bint::mulT(t2, t, Lv); + // a[n] = x[n*2] - t2[n*2] + mcl::bint::subT(a, x, t2); +} + } // mcl::ec::local // [X:Y:Z] as Proj = (X/Z, Y/Z) as Affine = [XZ:YZ^2:Z] as Jacobi diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index f14b596e..ae03f2c9 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -471,27 +471,6 @@ inline Vec getUnitAt(const Vec *x, size_t xN, size_t bitPos) return vor(vpsrlq(x[q], r), vpsllq(x[q+1], bitSize - r)); } -inline void split(Unit a[2], Unit b[2], const Unit x[4]) -{ - /* - z = -0xd201000000010000 - L = z^2-1 = 0xac45a4010001a40200000000ffffffff - r = L^2+L+1 = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 - s=255 - v = 0xbe35f678f00fd56eb1fb72917b67f718 - */ - static const uint64_t Lv[] = { 0x00000000ffffffff, 0xac45a4010001a402 }; - static const uint64_t vv[] = { 0xb1fb72917b67f718, 0xbe35f678f00fd56e }; - static const size_t n = 128 / mcl::UnitBitSize; - Unit t[n*3]; - mcl::bint::mulNM(t, x, n*2, vv, n); - mcl::bint::shrT(t, t+n*2-1, mcl::UnitBitSize-1); // >>255 - b[0] = t[0]; - b[1] = t[1]; - mcl::bint::mulT(t, t, Lv); - mcl::bint::subT(a, x, t); -} - class Montgomery { Unit v_[N]; public: @@ -1120,7 +1099,7 @@ struct EcM { static void mulGLV(EcM& Q, const EcM& _P, const Vec y[4]) { EcM P = _P; - if (!isProj) mcl::ec::ProjToJacobi(P, _P); +// if (!isProj) mcl::ec::ProjToJacobi(P, _P); Vec a[2], b[2]; EcM tbl1[tblN], tbl2[tblN]; makeTable(tbl1, P); @@ -1134,7 +1113,7 @@ struct EcM { for (size_t i = 0; i < M; i++) { Unit buf[4] = { src[i+M*0], src[i+M*1], src[i+M*2], src[i+M*3] }; Unit aa[2], bb[2]; - split(aa, bb, buf); + mcl::ec::local::optimizedSplitRawForBLS12_381(aa, bb, buf); pa[i+M*0] = aa[0]; pa[i+M*1] = aa[1]; pb[i+M*0] = bb[0]; pb[i+M*1] = bb[1]; } @@ -1167,7 +1146,7 @@ struct EcM { mul(T, T, b, 2); add(Q, Q, T); #endif - if (!isProj) mcl::ec::JacobiToProj(Q, Q); +// if (!isProj) mcl::ec::JacobiToProj(Q, Q); } static void mulGLVbn(mcl::msm::G1A _Q[8], mcl::msm::G1A _P[8], const Vec y[4]) { @@ -1356,7 +1335,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) Unit ya[4]; fr->fromMont(ya, y[i*8+j].v); Unit a[2], b[2]; - split(a, b, ya); + mcl::ec::local::optimizedSplitRawForBLS12_381(a, b, ya); py[j+0] = a[0]; py[j+8] = a[1]; py[j+16] = b[0]; @@ -1393,7 +1372,7 @@ void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) const bool mixed = true; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; - g_param.normalizeVecG1(x, x, n); + if (!isProj) g_param.normalizeVecG1(x, x, n); for (size_t i = 0; i < n; i += 8) { EcM P; Vec yv[4]; From b158d30eadcf89d6dc8c4ccd34ef81b2cdd17556 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 30 Apr 2024 08:23:28 +0900 Subject: [PATCH 07/28] [doc] fix the value r of BN_SNARK1 --- api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api.md b/api.md index ac6e98cb..07875410 100644 --- a/api.md +++ b/api.md @@ -61,7 +61,7 @@ r = |G1| = |G2| = |GT| curveType | b| r and p | ------------|--|------------------| BN254 | 2|r = 0x2523648240000001ba344d8000000007ff9f800000000010a10000000000000d
p = 0x2523648240000001ba344d80000000086121000000000013a700000000000013 | -BN_SNARK1|3|r = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47| +BN_SNARK1|3|r = 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47| BLS12-381 | 4|r = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001
p = 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab | BN381 | 2|r = 0x240026400f3d82b2e42de125b00158405b710818ac000007e0042f008e3e00000000001080046200000000000000000d
p = 0x240026400f3d82b2e42de125b00158405b710818ac00000840046200950400000000001380052e000000000000000013 | From 5e29685e2cc08925bb08053482b6ce221a06767a Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 2 May 2024 17:23:16 +0900 Subject: [PATCH 08/28] disable optimized version of split for 32-bit OS --- include/mcl/bn.hpp | 9 +++++++-- include/mcl/ec.hpp | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp index 80a5fbde..480e434f 100644 --- a/include/mcl/bn.hpp +++ b/include/mcl/bn.hpp @@ -693,9 +693,12 @@ struct GLV1 : mcl::GLV1T { const mpz_class& r = Fr::getOp().mp; B[0][0] = z * z - 1; // L v0 = (B[0][0] << rBitSize) / r; - if (curveType == BLS12_381.curveType && MCL_SIZEOF_UNIT == 8) { +#if MCL_SIZEOF_UNIT == 8 + if (curveType == BLS12_381.curveType) { optimizedSplit = optimizedSplitForBLS12_381; - } else { + } else +#endif + { optimizedSplit = splitForBLS12; } } else { @@ -724,6 +727,7 @@ struct GLV1 : mcl::GLV1T { b = (x * v0) >> rBitSize; a = x - b * B[0][0]; } +#if MCL_SIZEOF_UNIT == 8 static inline void optimizedSplitForBLS12_381(mpz_class u[2], const mpz_class& x) { static const size_t n = 128 / mcl::UnitBitSize; @@ -735,6 +739,7 @@ struct GLV1 : mcl::GLV1T { gmp::setArray(&dummy, u[1], b, n); (void)dummy; } +#endif }; /* diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 847802c0..8b3b6bf0 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -242,6 +242,7 @@ void normalizeVecT(Eout& Q, Ein& P, size_t n, size_t N = 256) } } +#if MCL_SIZEOF_UNIT == 8 inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) { assert(sizeof(Unit) == 8); @@ -270,6 +271,7 @@ inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) // a[n] = x[n*2] - t2[n*2] mcl::bint::subT(a, x, t2); } +#endif } // mcl::ec::local From 5aba8ecc8d8dfd5153cff1e70efdb2babeb74712 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 6 May 2024 08:36:53 +0900 Subject: [PATCH 09/28] [doc] fix api comment --- api.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api.md b/api.md index 07875410..06222df0 100644 --- a/api.md +++ b/api.md @@ -253,7 +253,7 @@ C++ T x = ; ``` -### Set `buf[0..bufSize-1]` to `x` with masking according to the following way. +### Set `bufSize` bytes `buf` to `x` with masking according to the following way. ``` int mclBnFp_setLittleEndian(mclBnFp *x, const void *buf, mclSize bufSize); int mclBnFr_setLittleEndian(mclBnFr *x, const void *buf, mclSize bufSize); @@ -270,7 +270,7 @@ T::setArrayMask(const uint8_t *buf, size_t n); - always return 0 -### Set (`buf[0..bufSize-1]` mod `p` or `r`) to `x`. +### Set `bufSize` bytes `buf` of mod `p` or `r` to `x`. ``` int mclBnFp_setLittleEndianMod(mclBnFp *x, const void *buf, mclSize bufSize); int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize); @@ -281,7 +281,7 @@ C++ T::setLittleEndianMod(const uint8_t *buf, mclSize bufSize); ``` -- return 0 if bufSize <= (sizeof(*x) * 8 * 2) else -1 +- return 0 if bufSize <= (sizeof(T) * 2) else -1 ### Get little-endian byte sequence `buf` corresponding to `x` ``` From 05ab08009649de13721fe25b1135b0c02de96ca8 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 7 May 2024 17:52:15 +0900 Subject: [PATCH 10/28] refactoring msm --- src/msm_avx.cpp | 107 ++++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index ae03f2c9..488a4767 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -6,12 +6,13 @@ http://opensource.org/licenses/BSD-3-Clause */ #include -#include #ifdef _WIN32 #include #else #include #endif + +#include #define XBYAK_NO_EXCEPTION #include "xbyak/xbyak_util.h" @@ -19,6 +20,9 @@ typedef mcl::Unit Unit; typedef __m512i Vec; typedef __mmask8 Vmask; +//namespace { + + static mcl::msm::Param g_param; const size_t S = sizeof(Unit)*8-1; // 63 @@ -27,12 +31,10 @@ const size_t N = 8; // = ceil(384/52) const size_t M = sizeof(Vec) / sizeof(Unit); const uint64_t g_mask = (Unit(1)< inline Vmask isZero(const E& P) @@ -964,7 +967,7 @@ struct EcM { dblJacobiNoCheck(z, x); } } - static void init(Montgomery& mont) + static void init(const Montgomery& mont) { const int b = 4; mpz_class b3 = mont.toMont(b * 3); @@ -1148,6 +1151,7 @@ struct EcM { #endif // if (!isProj) mcl::ec::JacobiToProj(Q, Q); } +#if 0 static void mulGLVbn(mcl::msm::G1A _Q[8], mcl::msm::G1A _P[8], const Vec y[4]) { const bool isProj = false; @@ -1158,6 +1162,7 @@ struct EcM { mulGLV(Q, P, y); Q.getG1(_Q); } +#endif void cset(const Vmask& c, const EcM& v) { x.cset(c, v.x); @@ -1206,6 +1211,7 @@ inline void cvtFr8toVec4(Vec yv[4], const mcl::msm::FrA y[8]) cvt4Ux8to8Ux4(yv, ya); } +#if 0 template inline void mulVecAVX512_naive(mcl::msm::G1A& P, const mcl::msm::G1A *x, const mcl::msm::FrA *y, size_t n) { @@ -1226,6 +1232,7 @@ inline void mulVecAVX512_naive(mcl::msm::G1A& P, const mcl::msm::G1A *x, const m if (!isProj) mcl::ec::JacobiToProj(R, R); reduceSum(P, R); } +#endif // xVec[n], yVec[n * maxBitSize/64] // assume xVec[] is normalized @@ -1311,6 +1318,7 @@ void mulVec_naive(mcl::msm::G1A& P, const mcl::msm::G1A *x, const mcl::msm::FrA } } #endif +//} // namespace namespace mcl { namespace msm { @@ -1390,30 +1398,31 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) if (!cpu.has(Xbyak::util::Cpu::tAVX512_IFMA)) return false; g_param = *param; - Montgomery& mont = g_mont; const mpz_class& mp = g_param.fp->mp; - - mont.set(mp); - toArray<6, 64>(g_mpM2, mp-2); - expand(vmask, g_mask); - expandN(vpN, mp); - expand(vrp, mont.rp); + FpM::init(mp); + Montgomery& mont = FpM::g_mont; + Unit pM2[6]; // x^(-1) = x^(p-2) mod p + toArray<6, 64>(pM2, mp-2); + expand(g_vmask, g_mask); + expandN(g_vpN, mp); + expand(g_vrp, mont.rp); + Vec vpM2[6]; // NOT 52-bit but 64-bit for (int i = 0; i < 6; i++) { - expand(g_vmpM2[i], g_mpM2[i]); + expand(vpM2[i], pM2[i]); } expand(g_vmask4, getMask(4)); for (int i = 0; i < 8; i++) { ((Unit*)&g_offset)[i] = i; } expand(g_vi192, 192); - expandN(FpM::one_.v, g_mont.toMont(1)); + expandN(FpM::one_.v, mont.toMont(1)); expandN(FpM::rawOne_.v, mpz_class(1)); - expandN(FpM::mR2_.v, g_mont.mR2); + expandN(FpM::mR2_.v, mont.mR2); { mpz_class t(1); t <<= 32; - FpM::m64to52_.set(t); - FpM::pow(FpM::m52to64_, FpM::m64to52_, g_vmpM2, 6); + FpM::m64to52_.set(t); // 2^32 + FpM::pow(FpM::m52to64_, FpM::m64to52_, vpM2, 6); } FpM::rw_.setFp(g_param.rw); EcM::init(mont); From 973f66f3dcece7b10b62df142c87e14107d45106 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 09:37:37 +0900 Subject: [PATCH 11/28] add test of msm --- Makefile | 7 +++++++ src/msm_avx.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Makefile b/Makefile index ad7cf292..e334dca7 100644 --- a/Makefile +++ b/Makefile @@ -443,6 +443,13 @@ bin/llvm_test64.exe: test/llvm_test.cpp src/base64.ll bin/llvm_test32.exe: test/llvm_test.cpp src/base32.ll $(CLANG) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base32.ll -m32 +$(OBJ_DIR)/$(MSM)_test.o: src/$(MSM).cpp + $(PRE)$(CXX) -c $< -o $@ $(CFLAGS) -mavx512f -mavx512ifma -std=c++11 $(CFLAGS_USER) -DMCL_MSM_TEST +MSM_TEST_OBJ=$(OBJ_DIR)/$(MSM)_test.o $(filter-out $(OBJ_DIR)/msm_avx.o,$(LIB_OBJ)) +$(EXE_DIR)/msm_test.exe: $(MSM_TEST_OBJ) + $(PRE)$(CXX) -o $@ $(LDFLAGS) $(MSM_TEST_OBJ) +-include $(OBJ_DIR)/msm_test.d + make_tbl: $(MAKE) ../bls/src/qcoeff-bn254.hpp diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 488a4767..8e472a33 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1431,3 +1431,29 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) } } // mcl::msm +#ifdef MCL_MSM_TEST +#include +#include +#include + +CYBOZU_TEST_AUTO(mulEach) +{ + using namespace mcl::bn; + initPairing(mcl::BLS12_381); + const size_t n = 8; + G1 P[n], Q[n], R[n]; + Fr x[n]; + cybozu::XorShift rg; + for (size_t i = 0; i < n; i++) { + char c = 'a' + i; + hashAndMapToG1(P[i], &c, 1); + x[i].setByCSPRNG(rg); + Q[i] = P[i]; + G1::mul(R[i], Q[i], x[i]); + } + G1::mulEach(P, x, n); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(P[i], R[i]); + } +} +#endif From 1d13bbb781eafac6f9ed08505c7def12b1efed84 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 10:39:23 +0900 Subject: [PATCH 12/28] remove unused code --- src/msm_avx.cpp | 93 ++----------------------------------------------- 1 file changed, 2 insertions(+), 91 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 8e472a33..890c4509 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -20,8 +20,7 @@ typedef mcl::Unit Unit; typedef __m512i Vec; typedef __mmask8 Vmask; -//namespace { - +namespace { static mcl::msm::Param g_param; @@ -999,44 +998,20 @@ struct EcM { } void setG1(const mcl::msm::G1A v[M], bool JacobiToProj = true) { -#if 1 setArray(v[0].v); FpM::mul(x, x, FpM::m64to52_); FpM::mul(y, y, FpM::m64to52_); FpM::mul(z, z, FpM::m64to52_); -#else - Unit a[6*3*M]; - const Unit *src = (const Unit *)v; - for (size_t i = 0; i < M*3; i++) { - mcl::bn::Fp::getOp().fromMont(a+i*6, src+i*6); - } - setArray(a); - x.toMont(x); - y.toMont(y); - z.toMont(z); -#endif if (JacobiToProj) mcl::ec::JacobiToProj(*this, *this); } void getG1(mcl::msm::G1A v[M], bool ProjToJacobi = true) const { EcM T = *this; if (ProjToJacobi) mcl::ec::ProjToJacobi(T, T); -#if 1 FpM::mul(T.x, T.x, FpM::m52to64_); FpM::mul(T.y, T.y, FpM::m52to64_); FpM::mul(T.z, T.z, FpM::m52to64_); T.getArray(v[0].v); -#else - T.x.fromMont(T.x); - T.y.fromMont(T.y); - T.z.fromMont(T.z); - Unit a[6*3*M]; - T.getArray(a); - Unit *dst = (Unit *)v; - for (size_t i = 0; i < M*3; i++) { - mcl::bn::Fp::getOp().toMont(dst+i*6, a+i*6); - } -#endif } void normalize() { @@ -1211,29 +1186,6 @@ inline void cvtFr8toVec4(Vec yv[4], const mcl::msm::FrA y[8]) cvt4Ux8to8Ux4(yv, ya); } -#if 0 -template -inline void mulVecAVX512_naive(mcl::msm::G1A& P, const mcl::msm::G1A *x, const mcl::msm::FrA *y, size_t n) -{ - assert(n % 8 == 0); - EcM R; - for (size_t i = 0; i < n; i += 8) { - Vec yv[4]; - cvtFr8toVec4(yv, y+i); - EcM T, X; - X.setG1(x+i, isProj); - if (i == 0) { - EcM::mulGLV(R, X, yv); - } else { - EcM::mulGLV(T, X, yv); - EcM::add(R, R, T); - } - } - if (!isProj) mcl::ec::JacobiToProj(R, R); - reduceSum(P, R); -} -#endif - // xVec[n], yVec[n * maxBitSize/64] // assume xVec[] is normalized inline void mulVecAVX512_inner(mcl::msm::G1A& P, const EcM *xVec, const Vec *yVec, size_t n, size_t maxBitSize) @@ -1277,48 +1229,7 @@ inline void mulVecAVX512_inner(mcl::msm::G1A& P, const EcM *xVec, const Vec *yVe Xbyak::AlignedFree(tbl); } -#if 0 -void mulVec_naive(mcl::msm::G1A& P, const mcl::msm::G1A *x, const mcl::msm::FrA *y, size_t n) -{ - size_t c = mcl::ec::argminForMulVec(n); - size_t tblN = (1 << c) - 0; - mcl::msm::G1A *tbl = (mcl::msm::G1A*)CYBOZU_ALLOCA(sizeof(mcl::msm::G1A) * tblN); - const size_t maxBitSize = 256; - const size_t winN = (maxBitSize + c-1) / c; - mcl::msm::G1A *win = (mcl::msm::G1A*)CYBOZU_ALLOCA(sizeof(mcl::msm::G1A) * winN); - - Unit *yVec = (Unit*)CYBOZU_ALLOCA(sizeof(mcl::msm::FrA) * n); - const mcl::msm::addG1Func addG1 = g_param.addG1; - const mcl::msm::dblG1Func dblG1 = g_param.dblG1; - const mcl::msm::clearG1Func clearG1 = g_param.clearG1; - for (size_t i = 0; i < n; i++) { - g_param.fr->fromMont(yVec+i*4, y[i].v); - } - for (size_t w = 0; w < winN; w++) { - for (size_t i = 0; i < tblN; i++) { - clearG1(tbl[i]); - } - for (size_t i = 0; i < n; i++) { - Unit v = mcl::fp::getUnitAt(yVec+i*4, 4, c * w) & (tblN-1); - addG1(tbl[v], tbl[v], x[i]); - } - mcl::msm::G1A sum = tbl[tblN-1]; - win[w] = sum; - for (size_t i = 1; i < tblN-1; i++) { - addG1(sum, sum, tbl[tblN - 1 - i]); - addG1(win[w], win[w], sum); - } - } - P = win[winN - 1]; - for (size_t w = 1; w < winN; w++) { - for (size_t i = 0; i < c; i++) { - dblG1(P, P); - } - addG1(P, P, win[winN - 1 - w]); - } -} -#endif -//} // namespace +} // namespace namespace mcl { namespace msm { From 0318b2a65fa247c711dc397f248a4271f7dd930b Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 11:18:46 +0900 Subject: [PATCH 13/28] add test of normalizeVec --- src/msm_avx.cpp | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 890c4509..5abbfd29 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -16,6 +16,10 @@ #define XBYAK_NO_EXCEPTION #include "xbyak/xbyak_util.h" +#if defined(__GNUC__) && !defined(__EMSCRIPTEN__) +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + typedef mcl::Unit Unit; typedef __m512i Vec; typedef __mmask8 Vmask; @@ -934,8 +938,6 @@ inline void dblJacobiNoCheck(E& R, const E& P) struct EcM { typedef FpM Fp; - static const int a_ = 0; - static const int b_ = 4; static const int specialB_ = mcl::ec::local::Plus4; static const int w = 4; static const int tblN = 1<(Q, P, y); - Q.getG1(_Q); - } -#endif void cset(const Vmask& c, const EcM& v) { x.cset(c, v.x); @@ -1347,18 +1337,47 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) #include #include -CYBOZU_TEST_AUTO(mulEach) +using namespace mcl::bn; + +CYBOZU_TEST_AUTO(init) { - using namespace mcl::bn; initPairing(mcl::BLS12_381); +} + +void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg) +{ + for (size_t i = 0; i < n; i++) { + uint32_t v = rg.get32(); + hashAndMapToG1(P[i], &v, sizeof(v)); + x[i].setByCSPRNG(rg); + } +} + +CYBOZU_TEST_AUTO(normalizeVec) +{ + const size_t n = 8; + G1 P[n], Q[n]; + Fr x[n]; + cybozu::XorShift rg; + setParam(P, x, n, rg); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_ASSERT(!P[i].z.isOne()); + } + g_param.normalizeVecG1((mcl::msm::G1A*)Q, (const mcl::msm::G1A*)P, n); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_ASSERT(Q[i].z.isOne()); + } + CYBOZU_TEST_EQUAL_ARRAY(P, Q, n); +} + +CYBOZU_TEST_AUTO(mulEach) +{ const size_t n = 8; G1 P[n], Q[n], R[n]; Fr x[n]; cybozu::XorShift rg; + setParam(P, x, n, rg); for (size_t i = 0; i < n; i++) { - char c = 'a' + i; - hashAndMapToG1(P[i], &c, 1); - x[i].setByCSPRNG(rg); Q[i] = P[i]; G1::mul(R[i], Q[i], x[i]); } From 9723acdce7d1b8f26bb30c53f561a5ee9ddceae8 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 12:13:30 +0900 Subject: [PATCH 14/28] test of EcM::dbl, add --- src/msm_avx.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 5abbfd29..e27f283e 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1353,21 +1353,79 @@ void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg) } } -CYBOZU_TEST_AUTO(normalizeVec) +CYBOZU_TEST_AUTO(op) { const size_t n = 8; - G1 P[n], Q[n]; + G1 P[n]; + G1 Q[n]; + G1 R[n]; + G1 T[n]; Fr x[n]; + mcl::msm::G1A *PA = (mcl::msm::G1A*)P; + mcl::msm::G1A *QA = (mcl::msm::G1A*)Q; + mcl::msm::G1A *TA = (mcl::msm::G1A*)T; + + EcM PM, QM, RM; cybozu::XorShift rg; setParam(P, x, n, rg); for (size_t i = 0; i < n; i++) { CYBOZU_TEST_ASSERT(!P[i].z.isOne()); } - g_param.normalizeVecG1((mcl::msm::G1A*)Q, (const mcl::msm::G1A*)P, n); + g_param.normalizeVecG1(QA, PA, n); for (size_t i = 0; i < n; i++) { CYBOZU_TEST_ASSERT(Q[i].z.isOne()); } CYBOZU_TEST_EQUAL_ARRAY(P, Q, n); + + // not normalize + for (size_t i = 0; i < n; i++) { + G1::dbl(P[i], P[i]); + } + + // test dbl + // R = 2P + for (size_t i = 0; i < n; i++) { + G1::dbl(R[i], P[i]); + } + // as Proj + PM.setG1(PA); + EcM::dbl(QM, PM); + QM.getG1(QA); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(Q[i], R[i]); + } + + // as Jacobi + PM.setG1(PA, false); + EcM::dbl(QM, PM); + QM.getG1(QA, false); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(Q[i], R[i]); + } + + // test add + // R = P + Q + for (size_t i = 0; i < n; i++) { + G1::add(R[i], P[i], Q[i]); + } + + // as Proj + PM.setG1(PA); + QM.setG1(QA); + EcM::add(RM, PM, QM); + RM.getG1(TA); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(T[i], R[i]); + } + + // as Jacobi + PM.setG1(PA, false); + QM.setG1(QA, false); + EcM::add(RM, PM, QM); + RM.getG1(TA, false); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(T[i], R[i]); + } } CYBOZU_TEST_AUTO(mulEach) From ccfd37dad61260dd8ebc48b31eef700759645780 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 13:59:58 +0900 Subject: [PATCH 15/28] add test of mulEach --- src/msm_avx.cpp | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index e27f283e..5c31700f 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1079,7 +1079,6 @@ struct EcM { static void mulGLV(EcM& Q, const EcM& _P, const Vec y[4]) { EcM P = _P; -// if (!isProj) mcl::ec::ProjToJacobi(P, _P); Vec a[2], b[2]; EcM tbl1[tblN], tbl2[tblN]; makeTable(tbl1, P); @@ -1126,7 +1125,6 @@ struct EcM { mul(T, T, b, 2); add(Q, Q, T); #endif -// if (!isProj) mcl::ec::JacobiToProj(Q, Q); } void cset(const Vmask& c, const EcM& v) { @@ -1344,11 +1342,12 @@ CYBOZU_TEST_AUTO(init) initPairing(mcl::BLS12_381); } -void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg) +void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg, bool containsZero = false) { for (size_t i = 0; i < n; i++) { uint32_t v = rg.get32(); hashAndMapToG1(P[i], &v, sizeof(v)); + if (containsZero && i == 3) P[i].clear(); x[i].setByCSPRNG(rg); } } @@ -1367,13 +1366,13 @@ CYBOZU_TEST_AUTO(op) EcM PM, QM, RM; cybozu::XorShift rg; - setParam(P, x, n, rg); + setParam(P, x, n, rg, true); for (size_t i = 0; i < n; i++) { CYBOZU_TEST_ASSERT(!P[i].z.isOne()); } g_param.normalizeVecG1(QA, PA, n); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_ASSERT(Q[i].z.isOne()); + CYBOZU_TEST_ASSERT(Q[i].z.isOne() || Q[i].z.isZero()); } CYBOZU_TEST_EQUAL_ARRAY(P, Q, n); @@ -1426,6 +1425,29 @@ CYBOZU_TEST_AUTO(op) for (size_t i = 0; i < n; i++) { CYBOZU_TEST_EQUAL(T[i], R[i]); } + + // as Jacobi (mixed) + PM.setG1(PA, false); + for (size_t i = 0; i < n; i++) { + Q[i].normalize(); + } + QM.setG1(QA, false); + EcM::add(RM, PM, QM); + RM.getG1(TA, false); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(T[i], R[i]); + } + + // mulEachAVX512 + for (size_t i = 0; i < n; i++) { + P[i] = R[i]; + Q[i] = R[i]; + G1::mul(R[i], P[i], x[i]); + } + mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(Q[i], R[i]); + } } CYBOZU_TEST_AUTO(mulEach) From 6ba8d7bb6d36b2608556ab2adf5a1f4a89ac7302 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 17:50:01 +0900 Subject: [PATCH 16/28] add cmp test --- src/msm_avx.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 5c31700f..0000b4a2 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -48,6 +48,18 @@ inline Unit getMask(int w) return (Unit(1) << w) - 1; } +inline uint8_t cvtToInt(const Vmask& v) +{ + uint8_t r; + memcpy(&r, &v, sizeof(r)); + return r; +} + +inline void dump(const Vmask& v, const char *msg = nullptr) +{ + mcl::bint::dump(&v, sizeof(v), msg); +} + template inline void toArray(Unit x[N], mpz_class mx) { @@ -707,6 +719,10 @@ struct FpM { mpz_class r = getRaw(i); return g_mont.fromMont(r); } + void clear() + { + memset(this, 0, sizeof(*this)); + } bool operator==(const FpM& rhs) const { for (size_t i = 0; i < N; i++) { @@ -952,6 +968,10 @@ struct EcM { if (isProj) { mcl::ec::addCTProj(z, x, y); } else { +#if 0 + Vmask v = x.isEqualJacobiAll(y); + dump(v, "v"); +#endif if (mixed) { addJacobiMixedNoCheck(z, x, y); } else { @@ -1132,7 +1152,7 @@ struct EcM { y.cset(c, v.y); z.cset(c, v.z); } - Vmask isEqualAll(const EcM& rhs) const + Vmask isEqualJacobiAll(const EcM& rhs) const { FpM s1, s2, t1, t2; Vmask v1, v2; @@ -1141,11 +1161,13 @@ struct EcM { FpM::mul(t1, x, s2); FpM::mul(t2, rhs.x, s1); v1 = t2.isEqualAll(s1); +dump(v1, "v1"); FpM::mul(t1, y, s2); FpM::mul(t2, rhs.y, s1); FpM::mul(t1, t1, rhs.z); FpM::mul(t2, t2, z); v2 = t1.isEqualAll(t2); +dump(v2, "v2"); return mand(v1, v2); } }; @@ -1352,6 +1374,40 @@ void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg, bool containsZero = } } +CYBOZU_TEST_AUTO(cmp) +{ + const size_t n = 8; + Vmask v; + FpM x, y; + x.clear(); + v = x.isEqualAll(x); + CYBOZU_TEST_EQUAL(cvtToInt(v), 0xff); + for (size_t i = 0; i < n; i++) { + y.clear(); + y.set(1, i); + v = x.isEqualAll(y); + CYBOZU_TEST_EQUAL(cvtToInt(v), 0xff ^ (1<(QM, PM); @@ -1437,6 +1494,7 @@ CYBOZU_TEST_AUTO(op) for (size_t i = 0; i < n; i++) { CYBOZU_TEST_EQUAL(T[i], R[i]); } +#endif // mulEachAVX512 for (size_t i = 0; i < n; i++) { From ae61978990c25e8df7032e5098bf5f4dad60564e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 8 May 2024 18:02:52 +0900 Subject: [PATCH 17/28] add test of EcM::isEqualJacobiAll --- src/msm_avx.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 0000b4a2..29a8e782 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -954,6 +954,8 @@ inline void dblJacobiNoCheck(E& R, const E& P) struct EcM { typedef FpM Fp; + static const int a_ = 0; + static const int b_ = 4; static const int specialB_ = mcl::ec::local::Plus4; static const int w = 4; static const int tblN = 1<(QM, PM); @@ -1510,6 +1511,7 @@ CYBOZU_TEST_AUTO(op) CYBOZU_TEST_AUTO(mulEach) { + return; const size_t n = 8; G1 P[n], Q[n], R[n]; Fr x[n]; From 820bf56607a1f829461327f06fefb94c014fc994 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 9 May 2024 08:15:18 +0900 Subject: [PATCH 18/28] tweak --- src/msm_avx.cpp | 56 +++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 29a8e782..74d629eb 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1419,45 +1419,41 @@ CYBOZU_TEST_AUTO(op) Fr x[n]; mcl::msm::G1A *PA = (mcl::msm::G1A*)P; mcl::msm::G1A *QA = (mcl::msm::G1A*)Q; + mcl::msm::G1A *RA = (mcl::msm::G1A*)R; mcl::msm::G1A *TA = (mcl::msm::G1A*)T; - EcM PM, QM, RM; + EcM PM, QM, TM; cybozu::XorShift rg; - setParam(P, x, n, rg, true); + setParam(P, x, n, rg, false); + setParam(Q, x, n, rg, true); // contains zero for (size_t i = 0; i < n; i++) { CYBOZU_TEST_ASSERT(!P[i].z.isOne()); } - g_param.normalizeVecG1(QA, PA, n); + g_param.normalizeVecG1(RA, PA, n); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_ASSERT(Q[i].z.isOne() || Q[i].z.isZero()); - } - CYBOZU_TEST_EQUAL_ARRAY(P, Q, n); - - // not normalize - for (size_t i = 0; i < n; i++) { - G1::dbl(P[i], P[i]); + CYBOZU_TEST_ASSERT(R[i].z.isOne() || R[i].z.isZero()); } + CYBOZU_TEST_EQUAL_ARRAY(P, R, n); // test dbl // R = 2P for (size_t i = 0; i < n; i++) { G1::dbl(R[i], P[i]); } -#if 1 // as Proj PM.setG1(PA); - EcM::dbl(QM, PM); - QM.getG1(QA); + EcM::dbl(TM, PM); + TM.getG1(TA); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(Q[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], T[i]); } // as Jacobi PM.setG1(PA, false); - EcM::dbl(QM, PM); - QM.getG1(QA, false); + EcM::dbl(TM, PM); + TM.getG1(TA, false); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(Q[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], T[i]); } // test add @@ -1469,43 +1465,39 @@ CYBOZU_TEST_AUTO(op) // as Proj PM.setG1(PA); QM.setG1(QA); - EcM::add(RM, PM, QM); - RM.getG1(TA); + EcM::add(TM, PM, QM); + TM.getG1(TA); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(T[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], T[i]); } // as Jacobi PM.setG1(PA, false); QM.setG1(QA, false); - EcM::add(RM, PM, QM); - RM.getG1(TA, false); + EcM::add(TM, PM, QM); + TM.getG1(TA, false); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(T[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], T[i]); } // as Jacobi (mixed) - PM.setG1(PA, false); for (size_t i = 0; i < n; i++) { Q[i].normalize(); } QM.setG1(QA, false); - EcM::add(RM, PM, QM); - RM.getG1(TA, false); + EcM::add(TM, PM, QM); + TM.getG1(TA, false); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(T[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], T[i]); } -#endif - // mulEachAVX512 for (size_t i = 0; i < n; i++) { - P[i] = R[i]; - Q[i] = R[i]; + Q[i] = P[i]; G1::mul(R[i], P[i], x[i]); } mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(Q[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], Q[i]); } } From c8c06fd951a7f7f8b52f54b0630254d0c81bad60 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 9 May 2024 17:48:54 +0900 Subject: [PATCH 19/28] [doc] update comment of conversion --- include/mcl/ec.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 8b3b6bf0..6f2458d7 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -276,7 +276,7 @@ inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) } // mcl::ec::local // [X:Y:Z] as Proj = (X/Z, Y/Z) as Affine = [XZ:YZ^2:Z] as Jacobi -// Remark. convert P = [1:0:0] to Q = [0:0:0] +// Remark. convert P = [*:*:0] to Q = [0:0:0] template void ProjToJacobi(E& Q, const E& P) { @@ -288,7 +288,7 @@ void ProjToJacobi(E& Q, const E& P) } // [X:Y:Z] as Jacobi = (X/Z^2, Y/Z^3) as Affine = [XZ:Y:Z^3] as Proj -// Remark. convert P = [1:1:0] to Q = [0:1:0] +// Remark. convert P = [*:1:0] to Q = [0:1:0] template void JacobiToProj(E& Q, const E& P) { From bd36ff61f209fb5b3b1ce06484064d1ce259af5c Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 9 May 2024 17:56:17 +0900 Subject: [PATCH 20/28] refactoring msm --- include/mcl/ec.hpp | 8 +-- src/msm_avx.cpp | 132 ++++++++++++++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 39 deletions(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 6f2458d7..0bc67560 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -1501,11 +1501,13 @@ class EcT : public fp::Serializable > { { if (mode_ == ec::Jacobi) { x = 1; - } else { + y = 1; + z.clear(); + } else { // ec::Proj x.clear(); + y = 1; + z.clear(); } - y = 1; - z.clear(); } static inline void clear(EcT& P) { diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 74d629eb..6f3f0e13 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -739,6 +739,14 @@ struct FpM { } return vcmpeq(t, vzero()); } + Vmask isZero() const + { + Vec t = v[0]; + for (size_t i = 1; i < M; i++) { + t = vor(t, v[i]); + } + return vcmpeq(t, vzero()); + } static void pow(FpM& z, const FpM& x, const Vec *y, size_t yn) { const int w = 4; @@ -802,6 +810,15 @@ struct FpM { v[i] = vselect(c, x.v[i], v[i]); } } + // return c ? a : b; + static FpM select(const Vmask& c, const FpM& a, const FpM& b) + { + FpM d; + for (size_t i = 0; i < N; i++) { + d.v[i] = vselect(c, a.v[i], b.v[i]); + } + return d; + } static void init(const mpz_class& mp) { g_mont.init(mp); @@ -816,16 +833,6 @@ FpM FpM::m64to52_; FpM FpM::m52to64_; Montgomery FpM::g_mont; -template -inline Vmask isZero(const E& P) -{ - Vec v = P.z.v[0]; - for (size_t i = 1; i < N; i++) { - v = vor(v, P.z.v[i]); - } - return vcmpeq(v, vzero()); -} - template inline void normalizeJacobiVec(E P[n]) { @@ -863,8 +870,6 @@ template inline void addJacobiMixedNoCheck(E& R, const E& P, const E& Q) { typedef typename E::Fp F; - Vmask c = isZero(Q); - E saveP = P; F r, U1, S1, H, H3; F::sqr(r, P.z); U1 = P.x; @@ -886,7 +891,6 @@ inline void addJacobiMixedNoCheck(E& R, const E& P, const E& Q) F::mul(U1, U1, r); F::mul(H3, H3, S1); F::sub(R.y, U1, H3); - R.cset(c, saveP); } // 12M+4S+7A @@ -895,8 +899,6 @@ template inline void addJacobiNoCheck(E& R, const E& P, const E& Q) { typedef typename E::Fp F; - Vmask c = isZero(Q); - E saveP = P; F r, U1, S1, H, H3; F::sqr(r, P.z); F::sqr(S1, Q.z); @@ -921,7 +923,6 @@ inline void addJacobiNoCheck(E& R, const E& P, const E& Q) F::mul(U1, U1, r); F::mul(H3, H3, S1); F::sub(R.y, U1, H3); - R.cset(c, saveP); } // assume a = 0 @@ -970,15 +971,14 @@ struct EcM { if (isProj) { mcl::ec::addCTProj(z, x, y); } else { -#if 0 - Vmask v = x.isEqualJacobiAll(y); - dump(v, "v"); -#endif + EcM t; if (mixed) { - addJacobiMixedNoCheck(z, x, y); + addJacobiMixedNoCheck(t, x, y); } else { - addJacobiNoCheck(z, x, y); + addJacobiNoCheck(t, x, y); } + t = select(x.isZero(), y, t); + z = select(y.isZero(), x, t); } } template @@ -995,13 +995,21 @@ struct EcM { const int b = 4; mpz_class b3 = mont.toMont(b * 3); expandN(b3_.v, b3); - zeroJacobi_.x.set(1); - zeroJacobi_.y.set(1); + zeroJacobi_.x.set(0); + zeroJacobi_.y.set(0); zeroJacobi_.z.set(0); zeroProj_.x.set(0); zeroProj_.y.set(1); zeroProj_.z.set(0); } + static EcM select(const Vmask& c, const EcM& a, const EcM& b) + { + EcM d; + d.x = FpM::select(c, a.x, b.x); + d.y = FpM::select(c, a.y, b.y); + d.z = FpM::select(c, a.z, b.z); + return d; + } template static const EcM& zero() { @@ -1026,7 +1034,10 @@ struct EcM { FpM::mul(x, x, FpM::m64to52_); FpM::mul(y, y, FpM::m64to52_); FpM::mul(z, z, FpM::m64to52_); - if (JacobiToProj) mcl::ec::JacobiToProj(*this, *this); + if (JacobiToProj) { + mcl::ec::JacobiToProj(*this, *this); + y = FpM::select(z.isZero(), FpM::one_, y); + } } void getG1(mcl::msm::G1A v[M], bool ProjToJacobi = true) const { @@ -1154,6 +1165,10 @@ struct EcM { y.cset(c, v.y); z.cset(c, v.z); } + Vmask isZero() const + { + return z.isZero(); + } Vmask isEqualJacobiAll(const EcM& rhs) const { FpM s1, s2, t1, t2; @@ -1170,6 +1185,9 @@ struct EcM { v2 = t1.isEqualAll(t2); return mand(v1, v2); } +#ifdef MCL_MSM_TEST + void dump(bool isProj, size_t n, const char *msg = nullptr) const; +#endif }; FpM EcM::b3_; @@ -1298,7 +1316,7 @@ void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) { assert(n % 8 == 0); const bool isProj = true; - const bool mixed = true; + const bool mixed = false; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; if (!isProj) g_param.normalizeVecG1(x, x, n); @@ -1308,12 +1326,17 @@ void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) cvtFr8toVec4(yv, y+i); P.setG1(x+i, isProj); EcM::mulGLV(P, P, yv); - P.getG1(x+i); + P.getG1(x+i, isProj); } } bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) { + assert(EcM::a_ == 0); + assert(EcM::b_ == 4); + (void)EcM::a_; // disable unused warning + (void)EcM::b_; + if (cp != mcl::BLS12_381) return false; Xbyak::util::Cpu cpu; if (!cpu.has(Xbyak::util::Cpu::tAVX512_IFMA)) return false; @@ -1356,20 +1379,30 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) #include #include #include +#include using namespace mcl::bn; +void EcM::dump(bool isProj, size_t n, const char *msg) const +{ + G1 T[8]; + getG1((mcl::msm::G1A*)T, isProj); + if (msg) printf("%s\n", msg); + for (size_t i = 0; i < n; i++) { + printf(" [%zd]=%s\n", i, T[i].getStr(16|mcl::IoEcProj).c_str()); + } +} + CYBOZU_TEST_AUTO(init) { initPairing(mcl::BLS12_381); } -void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg, bool containsZero = false) +void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg) { for (size_t i = 0; i < n; i++) { uint32_t v = rg.get32(); hashAndMapToG1(P[i], &v, sizeof(v)); - if (containsZero && i == 3) P[i].clear(); x[i].setByCSPRNG(rg); } } @@ -1424,8 +1457,10 @@ CYBOZU_TEST_AUTO(op) EcM PM, QM, TM; cybozu::XorShift rg; - setParam(P, x, n, rg, false); - setParam(Q, x, n, rg, true); // contains zero + setParam(P, x, n, rg); + setParam(Q, x, n, rg); + P[3].clear(); + Q[4].clear(); for (size_t i = 0; i < n; i++) { CYBOZU_TEST_ASSERT(!P[i].z.isOne()); } @@ -1490,6 +1525,7 @@ CYBOZU_TEST_AUTO(op) for (size_t i = 0; i < n; i++) { CYBOZU_TEST_EQUAL(R[i], T[i]); } +#if 1 // mulEachAVX512 for (size_t i = 0; i < n; i++) { Q[i] = P[i]; @@ -1499,23 +1535,47 @@ CYBOZU_TEST_AUTO(op) for (size_t i = 0; i < n; i++) { CYBOZU_TEST_EQUAL(R[i], Q[i]); } +#endif } -CYBOZU_TEST_AUTO(mulEach) +CYBOZU_TEST_AUTO(mulEach_special) { - return; const size_t n = 8; G1 P[n], Q[n], R[n]; Fr x[n]; + for (size_t i = 0; i < n; i++) P[i].clear(); + P[0].setStr("1 13de196893df2bb5b57882ff1eec37d98966aa71b828fd25125d04ed2c75ddc55d5bc68bd797bd555f9a827387ee6b28 5d59257a0fccd5215cdeb0928296a7a4d684823db76aef279120d2d71c4b54604ec885eb554f99780231ade171979a3", 16); + x[0].setStr("5b4b92c347ffcd8543904dd1b22a60d94b4a9c243046456b8befd41507bec5d", 16); + for (size_t i = 0; i < n; i++) Q[i] = P[i]; + G1::mul(R[0], P[0], x[0]); + G1::mulEach(Q, x, 8); + CYBOZU_TEST_EQUAL(R[0], Q[0]); +} + +CYBOZU_TEST_AUTO(mulEach) +{ + const size_t n = 64; + G1 P[n], Q[n], R[n]; + Fr x[n]; cybozu::XorShift rg; setParam(P, x, n, rg); + P[n/2].clear(); for (size_t i = 0; i < n; i++) { Q[i] = P[i]; - G1::mul(R[i], Q[i], x[i]); + G1::mul(R[i], P[i], x[i]); } - G1::mulEach(P, x, n); + G1::mulEach(Q, x, n); for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(P[i], R[i]); + CYBOZU_TEST_EQUAL(R[i], Q[i]); + if (R[i] != Q[i]) { + printf("P[%zd]=%s\n", i, P[i].getStr(16).c_str()); + printf("x[%zd]=%s\n", i, x[i].getStr(16).c_str()); + printf("R[%zd]=%s\n", i, R[i].getStr(16|mcl::IoEcProj).c_str()); + printf("Q[%zd]=%s\n", i, Q[i].getStr(16|mcl::IoEcProj).c_str()); + } } +#ifdef NDEBUG + CYBOZU_BENCH_C("mulEach", 100, G1::mulEach, Q, x, n); +#endif } #endif From 9084010d207ec29a59357dea2ce8ee0dad79014d Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 14 May 2024 12:05:36 +0900 Subject: [PATCH 21/28] fix normalizeJacobiVec for zero --- include/mcl/ec.hpp | 4 +-- src/msm_avx.cpp | 77 +++++++++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 0bc67560..f853919e 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -1500,8 +1500,8 @@ class EcT : public fp::Serializable > { void clear() { if (mode_ == ec::Jacobi) { - x = 1; - y = 1; + x = 0; + y = 0; z.clear(); } else { // ec::Proj x.clear(); diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 6f3f0e13..ad08b9f7 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -823,6 +823,9 @@ struct FpM { { g_mont.init(mp); } +#ifdef MCL_MSM_TEST + void dump(size_t pos, const char *msg = nullptr) const; +#endif }; FpM FpM::one_; @@ -839,27 +842,28 @@ inline void normalizeJacobiVec(E P[n]) assert(n >= 2); typedef typename E::Fp F; F tbl[n]; - tbl[0] = P[0].z; + tbl[0] = F::select(P[0].z.isZero(), F::one_, P[0].z); for (size_t i = 1; i < n; i++) { - F::mul(tbl[i], tbl[i-1], P[i].z); + F t = F::select(P[i].z.isZero(), F::one_, P[i].z); + F::mul(tbl[i], tbl[i-1], t); } F r; F::inv(r, tbl[n-1]); for (size_t i = 0; i < n; i++) { size_t pos = n-1-i; - F t = P[pos].z; + F& z = P[pos].z; F rz, rz2; - if (pos > 0) { - F::mul(rz, r, tbl[pos-1]); - F::mul(r, r, t); - } else { + if (pos == 0) { rz = r; + } else { + F::mul(rz, r, tbl[pos-1]); + F::mul(r, r, F::select(z.isZero(), F::one_, z)); } F::sqr(rz2, rz); F::mul(P[pos].x, P[pos].x, rz2); // xz^-2 F::mul(rz2, rz2, rz); F::mul(P[pos].y, P[pos].y, rz2); // yz^-3 - P[pos].z = F::one_; + z = F::select(z.isZero(), z, F::one_); } } @@ -1059,7 +1063,7 @@ struct EcM { template static void makeTable(EcM *tbl, const EcM& P) { - tbl[0].clear(); + tbl[0].clear(); tbl[1] = P; dbl(tbl[2], P); for (size_t i = 3; i < tblN; i++) { @@ -1109,13 +1113,13 @@ struct EcM { Q.z = P.z; } template - static void mulGLV(EcM& Q, const EcM& _P, const Vec y[4]) + static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { - EcM P = _P; + // QQQ (n=1024) isProj=T : 36.8, isProj=F&&mixed=F : 36.0, isProj=F&&mixed=T : 34.6 Vec a[2], b[2]; EcM tbl1[tblN], tbl2[tblN]; makeTable(tbl1, P); - if (!isProj) normalizeJacobiVec(tbl1+1); + if (!isProj && mixed) normalizeJacobiVec(tbl1+1); for (size_t i = 0; i < tblN; i++) { mulLambda(tbl2[i], tbl1[i]); } @@ -1186,7 +1190,7 @@ struct EcM { return mand(v1, v2); } #ifdef MCL_MSM_TEST - void dump(bool isProj, size_t n, const char *msg = nullptr) const; + void dump(bool isProj, size_t pos, const char *msg = nullptr) const; #endif }; @@ -1315,11 +1319,11 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) { assert(n % 8 == 0); - const bool isProj = true; - const bool mixed = false; + const bool isProj = false; + const bool mixed = true; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; - if (!isProj) g_param.normalizeVecG1(x, x, n); + if (!isProj && mixed) g_param.normalizeVecG1(x, x, n); for (size_t i = 0; i < n; i += 8) { EcM P; Vec yv[4]; @@ -1383,14 +1387,21 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) using namespace mcl::bn; -void EcM::dump(bool isProj, size_t n, const char *msg) const +void FpM::dump(size_t pos, const char *msg) const +{ + Fp T[8]; + getFp((mcl::msm::FpA*)T); + if (msg) printf("%s\n", msg); + printf(" [%zd]=%s\n", pos, T[pos].getStr(16).c_str()); +} + +void EcM::dump(bool isProj, size_t pos, const char *msg) const { G1 T[8]; getG1((mcl::msm::G1A*)T, isProj); if (msg) printf("%s\n", msg); - for (size_t i = 0; i < n; i++) { - printf(" [%zd]=%s\n", i, T[i].getStr(16|mcl::IoEcProj).c_str()); - } + printf(" [%zd]=%s\n", pos, T[pos].getStr(16|mcl::IoEcProj).c_str()); +// printf(" [%zd]=%s\n", pos, T[pos].getStr(16|mcl::IoEcAffine).c_str()); } CYBOZU_TEST_AUTO(init) @@ -1403,7 +1414,7 @@ void setParam(G1 *P, Fr *x, size_t n, cybozu::XorShift& rg) for (size_t i = 0; i < n; i++) { uint32_t v = rg.get32(); hashAndMapToG1(P[i], &v, sizeof(v)); - x[i].setByCSPRNG(rg); + if (x) x[i].setByCSPRNG(rg); } } @@ -1538,6 +1549,27 @@ CYBOZU_TEST_AUTO(op) #endif } +CYBOZU_TEST_AUTO(normalizeJacobiVec) +{ + const bool isProj = false; + const size_t n = 64; + G1 P[n], Q[n], R[n]; + EcM PP[n/8]; + cybozu::XorShift rg; + setParam(P, 0, n, rg); + P[n/2].clear(); + P[n/3].clear(); + mcl::ec::normalizeVec(Q, P, n); + for (size_t i = 0; i < n/8; i++) { + PP[i].setG1((mcl::msm::G1A*)&P[i*8], isProj); + } + normalizeJacobiVec(PP); + for (size_t i = 0; i < n/8; i++) { + PP[i].getG1((mcl::msm::G1A*)&R[i*8], isProj); + } + CYBOZU_TEST_EQUAL_ARRAY(P, R, n); +} + CYBOZU_TEST_AUTO(mulEach_special) { const size_t n = 8; @@ -1554,11 +1586,12 @@ CYBOZU_TEST_AUTO(mulEach_special) CYBOZU_TEST_AUTO(mulEach) { - const size_t n = 64; + const size_t n = 1024; G1 P[n], Q[n], R[n]; Fr x[n]; cybozu::XorShift rg; setParam(P, x, n, rg); + if (n > 32) P[32].clear(); P[n/2].clear(); for (size_t i = 0; i < n; i++) { Q[i] = P[i]; From 1562c62b4a0b2d412258742a283dad2b645e4f3e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 14 May 2024 14:49:33 +0900 Subject: [PATCH 22/28] add edge case of x=2L --- src/msm_avx.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index ad08b9f7..fe220635 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1578,10 +1578,33 @@ CYBOZU_TEST_AUTO(mulEach_special) for (size_t i = 0; i < n; i++) P[i].clear(); P[0].setStr("1 13de196893df2bb5b57882ff1eec37d98966aa71b828fd25125d04ed2c75ddc55d5bc68bd797bd555f9a827387ee6b28 5d59257a0fccd5215cdeb0928296a7a4d684823db76aef279120d2d71c4b54604ec885eb554f99780231ade171979a3", 16); x[0].setStr("5b4b92c347ffcd8543904dd1b22a60d94b4a9c243046456b8befd41507bec5d", 16); +// x[0].setStr("457977620305299156129707153920788267006"); // L+L for (size_t i = 0; i < n; i++) Q[i] = P[i]; G1::mul(R[0], P[0], x[0]); G1::mulEach(Q, x, 8); CYBOZU_TEST_EQUAL(R[0], Q[0]); + mpz_class L; + L.setStr("0xac45a4010001a40200000000ffffffff"); + mpz_class tbl[] = { + 0, + 1, + L, + }; + cybozu::XorShift rg; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const mpz_class& a = tbl[i]; + for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { + const mpz_class& b = tbl[j]; + setParam(P, x, n, rg); + x[0].setMpz(a * L + b); + for (size_t k = 0; k < 8; k++) { + Q[k] = P[k]; + G1::mul(R[k], P[k], x[k]); + } + G1::mulEach(Q, x, n); + CYBOZU_TEST_EQUAL_ARRAY(R, Q, n); + } + } } CYBOZU_TEST_AUTO(mulEach) From 350848fcfbb3c68a5eef8f95670ade7c8a49ff5b Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 14 May 2024 15:10:57 +0900 Subject: [PATCH 23/28] add adj option to split --- include/mcl/ec.hpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index f853919e..d3a6026d 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -243,7 +243,11 @@ void normalizeVecT(Eout& Q, Ein& P, size_t n, size_t N = 256) } #if MCL_SIZEOF_UNIT == 8 -inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) +/* + split x to (a, b) such that x = a + b L where 0 <= a, b <= L, 0 <= x <= r-1 = L^2+L + if adj is true, then 0 <= a < L, 0 <= b <= L+1 +*/ +inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4], bool adj = true) { assert(sizeof(Unit) == 8); /* @@ -251,25 +255,33 @@ inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) L = z^2-1 = 0xac45a4010001a40200000000ffffffff r = L^2+L+1 = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 s=255 - v = 0xbe35f678f00fd56eb1fb72917b67f718 + v = (1<>255 mcl::bint::shrT(t, t+n*2-1, mcl::UnitBitSize-1); // >>255 b[0] = t[0]; b[1] = t[1]; Unit t2[n*2]; - // t2[n*2] = t[n] * Lv[n] + // t2[n*2] = t[n] * L[n] // Do not overlap I/O buffers on pre-Broadwell CPUs. - mcl::bint::mulT(t2, t, Lv); + mcl::bint::mulT(t2, t, L); // a[n] = x[n*2] - t2[n*2] mcl::bint::subT(a, x, t2); + if (adj) { + if (mcl::bint::cmpEqT(a, L)) { + // if a == L then b = b + 1 and a = 0 + mcl::bint::addT(b, b, one); + mcl::bint::clearT(a); + } + } } #endif From b3f57130b59d591d0ca232105d94ba1eaaa350f8 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 15 May 2024 09:39:09 +0900 Subject: [PATCH 24/28] compute v2 before v1, then we can use mixed jacobi --- include/mcl/ec.hpp | 3 ++- src/msm_avx.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index d3a6026d..028153ff 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -247,8 +247,9 @@ void normalizeVecT(Eout& Q, Ein& P, size_t n, size_t N = 256) split x to (a, b) such that x = a + b L where 0 <= a, b <= L, 0 <= x <= r-1 = L^2+L if adj is true, then 0 <= a < L, 0 <= b <= L+1 */ -inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4], bool adj = true) +inline void optimizedSplitRawForBLS12_381(Unit a[2], Unit b[2], const Unit x[4]) { + const bool adj = false; assert(sizeof(Unit) == 8); /* z = -0xd201000000010000 diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index fe220635..386caf47 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1144,16 +1144,17 @@ struct EcM { if (!first) for (int k = 0; k < w; k++) EcM::dbl(Q, Q); EcM T; Vec idx; - idx = vand(vpsrlq(v1, bitLen-w-j*w), g_vmask4); + // compute v2 first before v1. see misc/internal.md + idx = vand(vpsrlq(v2, bitLen-w-j*w), g_vmask4); if (first) { - Q.gather(tbl1, idx); + Q.gather(tbl2, idx); first = false; } else { - T.gather(tbl1, idx); + T.gather(tbl2, idx); add(Q, Q, T); } - idx = vand(vpsrlq(v2, bitLen-w-j*w), g_vmask4); - T.gather(tbl2, idx); + idx = vand(vpsrlq(v1, bitLen-w-j*w), g_vmask4); + T.gather(tbl1, idx); add(Q, Q, T); } } From d1efd490682134b4719d6a2711f871466361658d Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 15 May 2024 11:31:34 +0900 Subject: [PATCH 25/28] [doc] update GLV algo for SIMD --- misc/internal.md | 54 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/misc/internal.md b/misc/internal.md index c83f684d..9f5d362d 100644 --- a/misc/internal.md +++ b/misc/internal.md @@ -45,33 +45,42 @@ bit_length|64|128|255|255|128 ### Split function ```python +adj = False def split(x): b = (x * v) >> s a = x - b * L + if adj: + if a >= L: + a -= L + b += 1 return (a, b) ``` +- x in [0, r-1] - a + b L = x for (a, b) = split(x). ### Theorem -0 <= a, b < H for all x in [0, M-r]. +0 <= a < 1.11 L < H and 0 <= b < L+1 for x in [0, r-1]. ### Proof ``` -Let r0 := L S % r, then S=v L + r0 and r0 in [0, L-1]. +Let r0 := L S % r, then S=v L + r0 and r0 in [0, L-1]. In fact, r0 ~ 0.11 L. Let r1 := x v % S, then x v = b S + r1 and r1 in [0, S-1]. ``` ``` -b <= xv / S < (M-r) (S/L)/S = (M-r)/L < H. +b <= xv / S < (r-1) (S/L)/S = (r-1)/L = L+1. ``` ``` aS = (x - bL)S = xS - bSL = xS - (xv - r1)L = x(S - vL) + r1 L = r0 x + r1 L - <= r0 (M-r) + (S-1)L < S H. + <= r0 (r-1) + (S-1)L = S L + (r-1)r0 - L. +a <= L + ((r-1)r0 - L)/S +((r-1)r0 - L)/S ~ 0.10016 L < 0.11 L. ``` -Then, a < H. -So for x in [0, M-1], set x = x - r if x >= H and apply split() to x. +### Remark +If adj is true, then a is in [0, L-1]. + ## window size - 128-bit (Fr is 256 bit and use GLV method) @@ -88,14 +97,15 @@ f(w)|130|68|51|48|58|86 argmin f(w) = 4 -## Use projective coordinates +## Selection of coordinates -- psuedo code of GLV method +### psuedo code of GLV method ```python def mul(P, x): - (a, b) = split(x) - # a, b < 1<<128 + assert(0 <= x < r) + (a, b) = split(x) # x = a + b L + # a, b < H=1<<128 w = 4 for i in range(1<> (w*i)) & mask - j2 = (b >> (w*i)) & mask ### AAA - Q = add(Q, tbl1[j1]) - Q = add(Q, tbl2[j2]) + j2 = (b >> (w*i)) & mask + Q = add(Q, tbl2[j2]) # ADD1 + Q = add(Q, tbl1[j1]) # ADD2 return Q ``` -The values of tbl1[i] are 0, P, ..., 15P, and the values of tbl2[i] are 0, LP, ... , 15LP. -Since L is odd and Q is a multiple of 16 just before AAA, Q != tbl1[j1] and Q != tbl2[j2]. So we can omit the ehckd of x == y in add(x, y). +Note that the value of tbl2 is added first. + +### Theorem +We can use Jacobi additive formula add(P, Q) assuming P != Q and P+Q != 0. + +Proof. + +During the calculation, Q is monotonic increase and always in [0, P, ..., (r-1)P]. + +- ADD1 : tbl2[] is in [0, L P, ..., 15 L P] and L is odd. +After computing AAA, Q is a multiple of 16 P, so Q != tbl2[j2]. +- ADD2 : tbl1[] is in [0, P, ..., 15 P]. +After computing ADD1, if the immediately preceding tbl2[j2] is 0, then then Q is a multiple of 16 P, so Q != tbl1[j1]. +Otherwise, Q is bigger than L P, so Q != tbl1[j1]. ## Jacobi and Proj `sqr` is equal to `mul` on AVX-512. From 01d1a430780bb7c3b6b2f00a919d90181e17b7f8 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Mon, 13 May 2024 20:00:51 -0300 Subject: [PATCH 26/28] Make MCL_STATIC_LIB full static Currently MCL_STATIC_LIB will produce static mcl::mclbn{N} targets, but these static targets still link to the *non*-static mcl::mcl and so you end up with a half-static result that still depends on a dynamic lib. This fixes it to link to the mcl_st static target when MCL_STATIC_LIB is enabled. --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e7664c4f..6cd09109 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,8 +283,10 @@ endif() foreach(bit IN ITEMS 256 384 384_256) if (MCL_STATIC_LIB) add_library(mclbn${bit} STATIC src/bn_c${bit}.cpp) + target_link_libraries(mclbn${bit} PUBLIC mcl::mcl_st) else() add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp) + target_link_libraries(mclbn${bit} PUBLIC mcl::mcl) endif() add_library(mcl::mclbn${bit} ALIAS mclbn${bit}) set_target_properties(mclbn${bit} PROPERTIES @@ -294,7 +296,6 @@ foreach(bit IN ITEMS 256 384 384_256) target_compile_options(mclbn${bit} PRIVATE ${MCL_COMPILE_OPTIONS}) target_compile_definitions(mclbn${bit} PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK) - target_link_libraries(mclbn${bit} PUBLIC mcl::mcl) set_target_properties(mclbn${bit} PROPERTIES VERSION ${mcl_VERSION} SOVERSION ${mcl_VERSION_MAJOR}) From e8b22e5df1019360c1f34443594635162c39ff32 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 15 May 2024 12:07:59 +0900 Subject: [PATCH 27/28] [doc] add contributing.md --- .github/CONTRIBUTING.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/CONTRIBUTING.md diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..2f957d54 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,32 @@ +# Contributing to mcl + +Thank you for considering contributing to the mcl project. This document provides guidelines on how to contribute. + +## Bug Reports and Feedback + +If you find a bug, have a feature request, or have questions, please open an issue. Include the following information: + +- Detailed description of the problem +- Steps to reproduce +- Expected behavior +- Actual behavior +- Environment details (OS, compiler version, etc.) + +## Creating Pull Requests + +If you want to add features or make fixes, follow these steps to create a pull request: + +1. Fork the repository +2. Create a new branch: `git checkout -b my-feature-branch` +3. Make your changes +4. Run tests and ensure all tests pass +5. Commit your changes: `git commit -am 'Add new feature'` +6. Push the branch: `git push origin my-feature-branch` +7. Create a pull request + +When creating a pull request, clearly describe the changes and include any related issue numbers. + +## License + +mcl is released under the BSD-3-Clause License. Any code contributions will be licensed under the same license. + From 5b91102c1547f271241ca06ef9d91ed26acdf8d4 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 17 May 2024 10:51:12 +0900 Subject: [PATCH 28/28] v1.92 --- include/mcl/op.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 9e544144..8e3e68c0 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -29,7 +29,7 @@ namespace mcl { -static const int version = 0x191; /* 0xABC = A.BC */ +static const int version = 0x192; /* 0xABC = A.BC */ /* specifies available string format mode for X::setIoMode()