Merge branch 'dev'

herumi · Jun 24, 2024 · 4ab7835 · 4ab7835
2 parents 273c5d3 + b747059
commit 4ab7835
Show file tree

Hide file tree

Showing 12 changed files with 451 additions and 125 deletions.
diff --git a/Makefile b/Makefile
@@ -16,6 +16,7 @@ TEST_SRC+=ecdsa_test.cpp ecdsa_c_test.cpp
 TEST_SRC+=mul_test.cpp
 TEST_SRC+=bint_test.cpp
 TEST_SRC+=low_func_test.cpp
+TEST_SRC+=smallmodp_test.cpp
 ifneq ($(MCL_USE_GMP),1)
   TEST_SRC+=static_init_test.cpp
 endif

diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp
@@ -8,7 +8,7 @@
 */
 
 #include <mcl/config.hpp>
-#include <cybozu/bit_operation.hpp>
+#include <mcl/util.hpp>
 #include <assert.h>
 #ifndef MCL_STANDALONE
 #include <stdio.h>
@@ -476,5 +476,133 @@ inline Unit getMontgomeryCoeff(Unit pLow, size_t bitSize = sizeof(Unit) * 8)
 	return pp;
 }
 
+struct SmallModP {
+	static const size_t d = 16; // d = 26 if use double in approx
+	static const size_t MAX_MUL_N = 1; // not used because mulSmallUnit is call at first.
+	static const size_t maxE_ = d - 2;
+	const Unit *p_;
+	Unit tbl_[MAX_MUL_N][MCL_MAX_UNIT_SIZE+1];
+	size_t n_;
+	size_t l_;
+	uint32_t p0_;
+
+	SmallModP()
+		: n_(0)
+		, l_(0)
+		, p0_(0)
+	{
+	}
+	// p must not be temporary.
+	void init(const Unit *p, size_t n)
+	{
+		p_ = p;
+		n_ = n;
+		l_ = mcl::fp::getBitSize(p, n);
+		Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit));
+		mcl::bint::clearN(t, n_+1);
+		size_t pos = d + l_ - 1;
+		{
+			size_t q = pos / MCL_UNIT_BIT_SIZE;
+			size_t r = pos % MCL_UNIT_BIT_SIZE;
+			t[q] = Unit(1) << r;
+		}
+		// p0 = 2**(d+l-1)/p
+		Unit q[2];
+		mcl::bint::div(q, 2, t, n_+1, p, n_);
+		assert(q[1] == 0);
+		p0_ = uint32_t(q[0]);
+		for (size_t i = 0; i < MAX_MUL_N; i++) {
+			tbl_[i][n_] = mcl::bint::mulUnitN(tbl_[i], p_, Unit(i+1), n_); // 1~MAX_MUL_N
+		}
+	}
+	Unit approx(Unit x0, size_t a) const
+	{
+//		uint64_t t = uint64_t(double(x0) * double(p0_)); // for d = 26
+		uint32_t t = uint32_t(x0 * p0_);
+		return Unit(t >> (2 * d + l_ - 1 - a));
+	}
+	// x[xn] %= p
+	// the effective range of return value is [0, n_)
+	bool quot(Unit *pQ, const Unit *x, size_t xn) const
+	{
+		size_t a = mcl::fp::getBitSize(x, xn);
+		if (a < l_) {
+			*pQ = 0;
+			return true;
+		}
+		size_t e = a - l_ + 1;
+		if (e > maxE_) return false;
+		Unit x0 = mcl::fp::getUnitAt(x, xn, a - d);
+		*pQ = approx(x0, a);
+		return true;
+	}
+	// return false if x[0, xn) is large
+	bool mod(Unit *z, const Unit *x, size_t xn) const
+	{
+		assert(xn <= n_ + 1);
+		Unit Q;
+		if (!quot(&Q, x, xn)) return false;
+		if (Q == 0) {
+			mcl::bint::copyN(z, x, n_);
+			return true;
+		}
+		Unit *t = (Unit*)CYBOZU_ALLOCA((n_+1)*sizeof(Unit));
+		const Unit *pQ = 0;
+		if (Q <= MAX_MUL_N) {
+			assert(Q > 0);
+			pQ = tbl_[Q-1];
+		} else {
+			t[n_] = mcl::bint::mulUnitN(t, p_, Q, n_);
+			pQ = t;
+		}
+		bool b = mcl::bint::subN(t, x, pQ, xn);
+		assert(!b); (void)b;
+		if (mcl::bint::cmpGeN(t, tbl_[0], xn)) { // tbl_[0] == p and tbl_[n_] = 0
+			mcl::bint::subN(z, t, p_, n_);
+		} else {
+			mcl::bint::copyN(z, t, n_);
+		}
+		return true;
+	}
+#if 1
+	// return false if x[0, xn) is large
+	template<size_t N>
+	bool modT(Unit z[N], const Unit *x, size_t xn) const
+	{
+		assert(xn <= N + 1);
+		Unit Q;
+		if (!quot(&Q, x, xn)) return false;
+		if (Q == 0) {
+			mcl::bint::copyT<N>(z, x);
+			return true;
+		}
+		Unit t[N+1];
+		const Unit *pQ = 0;
+		if (Q <= MAX_MUL_N) {
+			pQ = tbl_[Q-1];
+		} else {
+			t[N] = mcl::bint::mulUnitT<N>(t, p_, Q);
+			pQ = t;
+		}
+		bool b = mcl::bint::subT<N+1>(t, x, pQ);
+		assert(!b); (void)b;
+		if (mcl::bint::cmpGeT<N+1>(t, tbl_[0])) {
+			mcl::bint::subT<N>(z, t, p_);
+		} else {
+			mcl::bint::copyT<N>(z, t);
+		}
+		return true;
+	}
+#endif
+	template<size_t N>
+	static bool mulUnit(const SmallModP& smp, Unit z[N], const Unit x[N], Unit y)
+	{
+		Unit xy[N+1];
+		xy[N] = mulUnitT<N>(xy, x, y);
+		return smp.modT<N>(z, xy, N+1);
+//		return smp.mod(z, xy, N+1);
+	}
+};
+
 } } // mcl::bint
 
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
@@ -114,21 +114,6 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		op_.fp_add(y, x, x, op_.p);
 	}
 #endif
-	static inline void mul9A(Unit *y, const Unit *x)
-	{
-		mulSmall(y, x, 9);
-//		op_.fp_mul9(y, x, op_.p);
-	}
-	static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y)
-	{
-		assert(y <= op_.smallModp.maxMulN);
-		Unit xy[maxSize + 1];
-		op_.fp_mulUnitPre(xy, x, y);
-		int v = op_.smallModp.approxMul(xy);
-		const Unit *pv = op_.smallModp.getPmul(v);
-		op_.fp_subPre(z, xy, pv);
-		op_.fp_sub(z, z, op_.p, op_.p);
-	}
 public:
 	typedef FpT<tag, maxBitSize> BaseFp;
 	// return pointer to array v_[]
@@ -187,9 +172,6 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		if (op_.fp_mul2A_ == 0) {
 			op_.fp_mul2A_ = mul2A;
 		}
-		if (op_.fp_mul9A_ == 0) {
-			op_.fp_mul9A_ = mul9A;
-		}
 #endif
 		*pb = true;
 	}
@@ -608,23 +590,18 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	}
 	static void mul9(FpT& y, const FpT& x)
 	{
-#ifdef MCL_XBYAK_DIRECT_CALL
-		op_.fp_mul9A_(y.v_, x.v_);
-#else
-		mul9A(y.v_, x.v_);
-#endif
+		mulUnit(y, x, 9);
 	}
 	static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
 	static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
-	static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y)
-	{
-		mulSmall(z.v_, x.v_, y);
-	}
 	static inline void mulUnit(FpT& z, const FpT& x, const Unit y)
 	{
-		if (mulSmallUnit(z, x, y)) return;
+		if (mcl::fp::mulSmallUnit(z, x, y)) return;
+		if (op_.mulSmallUnit(op_.smallModP, z.v_, x.v_, y)) return;
 		op_.fp_mulUnit(z.v_, x.v_, y, op_.p);
 	}
+	// alias of mulUnit
+	static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { mulUnit(z, x, y); }
 	static inline void inv(FpT& y, const FpT& x)
 	{
 		assert(!x.isZero());

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
@@ -949,85 +949,6 @@ class SquareRoot {
 #endif
 };
 
-/*
-	x mod p for a small value x < (pMulTblN * p).
-*/
-struct SmallModp {
-	static const size_t unitBitSize = sizeof(Unit) * 8;
-	static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1;
-	static const size_t maxMulN = 9;
-	static const size_t pMulTblN = maxMulN + 1;
-	uint32_t N_;
-	uint32_t shiftL_;
-	uint32_t shiftR_;
-	uint32_t maxIdx_;
-	// pMulTbl_[i] = (p * i) >> (pBitSize_ - 1)
-	Unit pMulTbl_[pMulTblN][maxTblSize];
-	// idxTbl_[x] = (x << (pBitSize_ - 1)) / p
-	uint8_t idxTbl_[pMulTblN * 2];
-	// return x >> (pBitSize_ - 1)
-	SmallModp()
-		: N_(0)
-		, shiftL_(0)
-		, shiftR_(0)
-		, maxIdx_(0)
-		, pMulTbl_()
-		, idxTbl_()
-	{
-	}
-	// return argmax { i : x > i * p }
-	uint32_t approxMul(const Unit *x) const
-	{
-		uint32_t top = getTop(x);
-		assert(top <= maxIdx_);
-		return idxTbl_[top];
-	}
-	const Unit *getPmul(size_t v) const
-	{
-		assert(v < pMulTblN);
-		return pMulTbl_[v];
-	}
-	uint32_t getTop(const Unit *x) const
-	{
-		if (shiftR_ == 0) return x[N_ - 1];
-		return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
-	}
-	uint32_t cvtInt(const mpz_class& x) const
-	{
-		assert(mcl::gmp::getUnitSize(x) <= 1);
-		if (x == 0) {
-			return 0;
-		} else {
-			return uint32_t(mcl::gmp::getUnit(x)[0]);
-		}
-	}
-	void init(const mpz_class& p)
-	{
-		size_t pBitSize = mcl::gmp::getBitSize(p);
-		N_ = uint32_t((pBitSize + unitBitSize - 1) / unitBitSize);
-		shiftR_ = (pBitSize - 1) % unitBitSize;
-		shiftL_ = unitBitSize - shiftR_;
-		mpz_class t = 0;
-		for (size_t i = 0; i < pMulTblN; i++) {
-			bool b;
-			mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t);
-			assert(b);
-			(void)b;
-			if (i == pMulTblN - 1) {
-				maxIdx_ = getTop(pMulTbl_[i]);
-				assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_));
-				break;
-			}
-			t += p;
-		}
-
-		for (uint32_t i = 0; i <= maxIdx_; i++) {
-			idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p);
-		}
-	}
-};
-
-
 /*
 	Barrett Reduction
 	for non GMP version

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
@@ -29,7 +29,7 @@
 
 namespace mcl {
 
-static const int version = 0x193; /* 0xABC = A.BC */
+static const int version = 0x194; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
@@ -185,7 +185,8 @@ struct Op {
 	mcl::SquareRoot sq;
 	CYBOZU_ALIGN(8) char im[sizeof(mcl::inv::InvModT<maxUnitSize>)];
 	mcl::Modp modp;
-	mcl::SmallModp smallModp;
+//	mcl::SmallModp smallModp;
+	mcl::bint::SmallModP smallModP;
 	Unit half[maxUnitSize]; // (p + 1) / 2
 	Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery)
 	/*
@@ -210,7 +211,6 @@ struct Op {
 	void3u fp_mulA_;
 	void2u fp_sqrA_;
 	void2u fp_mul2A_;
-	void2u fp_mul9A_;
 	void3u fp2_addA_;
 	void3u fp2_subA_;
 	void2u fp2_negA_;
@@ -238,6 +238,7 @@ struct Op {
 	void3u fp_mul2;
 	void2uOp fp_invOp;
 	void2uIu fp_mulUnit; // fp_mulUnitPre
+	bool (*mulSmallUnit)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y);
 
 	void3u fpDbl_mulPre;
 	void2u fpDbl_sqrPre;
@@ -300,7 +301,6 @@ struct Op {
 		fp_mulA_ = 0;
 		fp_sqrA_ = 0;
 		fp_mul2A_ = 0;
-		fp_mul9A_ = 0;
 		fp2_addA_ = 0;
 		fp2_subA_ = 0;
 		fp2_negA_ = 0;
@@ -328,6 +328,7 @@ struct Op {
 		fp_mul2 = 0;
 		fp_invOp = 0;
 		fp_mulUnit = 0;
+		mulSmallUnit = 0;
 
 		fpDbl_mulPre = 0;
 		fpDbl_sqrPre = 0;