From 8a0f4ec01173e8766a8173832a73ef3a930d2cbc Mon Sep 17 00:00:00 2001 From: Jack Lloyd Date: Sat, 13 Jul 2024 13:55:44 -0400 Subject: [PATCH] Support fast field inversion for P-256, P-384, and P-521 For P-256 this speeds up projective->affine conversion by about 5%, saving ~2K cycles. For P-384 it speeds up conversion by 25%, saving 8K cycles. For P-521 it speeds up conversion by 20%, saving 33K cycles. The savings are most pronounced with ECDSA signing (since it is the fastest operation), improving by 2%, 4%, and 2% resp. GH #4027 --- src/lib/math/pcurves/pcurves_impl.h | 14 ++++-- .../pcurves_secp256r1/pcurves_secp256r1.cpp | 44 ++++++++++++++++- .../pcurves_secp384r1/pcurves_secp384r1.cpp | 49 +++++++++++++++++-- .../pcurves_secp521r1/pcurves_secp521r1.cpp | 44 ++++++++++++++++- src/lib/math/pcurves/pcurves_wrap.h | 34 +++++++++++-- 5 files changed, 172 insertions(+), 13 deletions(-) diff --git a/src/lib/math/pcurves/pcurves_impl.h b/src/lib/math/pcurves/pcurves_impl.h index 7f0b4075325..2646f8ba387 100644 --- a/src/lib/math/pcurves/pcurves_impl.h +++ b/src/lib/math/pcurves/pcurves_impl.h @@ -176,6 +176,14 @@ class IntMod final { return Self(Rep::redc(z)); } + void square_n(size_t n) { + std::array z; + for(size_t i = 0; i != n; ++i) { + comba_sqr(z.data(), this->data()); + m_val = Rep::redc(z); + } + } + // Negation modulo p constexpr Self negate() const { auto x_is_zero = CT::all_zeros(this->data(), N); @@ -213,9 +221,7 @@ class IntMod final { } for(size_t i = 1; i != Windows; ++i) { - for(size_t j = 0; j != WindowBits; ++j) { - r = r.square(); - } + r.square_n(WindowBits); const size_t w = read_window_bits(std::span{exp}, (Windows - i - 1) * WindowBits); @@ -1362,7 +1368,7 @@ inline auto map_to_curve_sswu(const typename C::FieldElement& u) -> typename C:: x1.conditional_assign(tv1.is_zero(), C::SSWU_C2()); const auto gx1 = C::AffinePoint::x3_ax_b(x1); - const auto x2 = C::SSWU_Z * u.square() * x1; + const auto x2 = z_u2 * x1; const auto gx2 = C::AffinePoint::x3_ax_b(x2); // Will be zero if gx1 is not a square diff --git a/src/lib/math/pcurves/pcurves_secp256r1/pcurves_secp256r1.cpp b/src/lib/math/pcurves/pcurves_secp256r1/pcurves_secp256r1.cpp index 80303a3204f..3139835c5ad 100644 --- a/src/lib/math/pcurves/pcurves_secp256r1/pcurves_secp256r1.cpp +++ b/src/lib/math/pcurves/pcurves_secp256r1/pcurves_secp256r1.cpp @@ -130,11 +130,51 @@ class Params final : public EllipticCurveParameters< #if BOTAN_MP_WORD_BITS == 32 // Secp256r1Rep works for 64 bit also, but is at best marginally faster at least // on compilers/CPUs tested so far -class Curve final : public EllipticCurve {}; +typedef EllipticCurve Secp256r1Base; #else -class Curve final : public EllipticCurve {}; +typedef EllipticCurve Secp256r1Base; #endif +class Curve final : public Secp256r1Base { + public: + // Return the square of the inverse of x + static FieldElement fe_invert2(const FieldElement& x) { + FieldElement r = x.square(); + r *= x; + + const auto p2 = r; + r.square_n(2); + r *= p2; + const auto p4 = r; + r.square_n(4); + r *= p4; + const auto p8 = r; + r.square_n(8); + r *= p8; + const auto p16 = r; + r.square_n(16); + r *= p16; + const auto p32 = r; + r.square_n(32); + r *= x; + r.square_n(128); + r *= p32; + r.square_n(32); + r *= p32; + r.square_n(16); + r *= p16; + r.square_n(8); + r *= p8; + r.square_n(4); + r *= p4; + r.square_n(2); + r *= p2; + r.square_n(2); + + return r; + } +}; + } // namespace secp256r1 } // namespace diff --git a/src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp b/src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp index 24bf5fe2556..c8537bb1264 100644 --- a/src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp +++ b/src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp @@ -136,11 +136,54 @@ class Params final : public EllipticCurveParameters< -12> { }; -class Curve final : public EllipticCurve {}; +// clang-format on -} +class Curve final : public EllipticCurve { + public: + // Return the square of the inverse of x + static FieldElement fe_invert2(const FieldElement& x) { + // From https://briansmith.org/ecc-inversion-addition-chains-01 + + FieldElement r = x.square(); + r *= x; + const auto x2 = r; + r = r.square(); + r *= x; + const auto x3 = r; + r.square_n(3); + r *= x3; + auto rl = r; + r.square_n(6); + r *= rl; + r.square_n(3); + r *= x3; + const auto x15 = r; + r.square_n(15); + r *= x15; + const auto x30 = r; + r.square_n(30); + r *= x30; + rl = r; + r.square_n(60); + r *= rl; + rl = r; + r.square_n(120); + r *= rl; + r.square_n(15); + r *= x15; + r.square_n(31); + r *= x30; + r.square_n(2); + r *= x2; + r.square_n(94); + r *= x30; + r.square_n(2); -// clang-format on + return r; + } +}; + +} // namespace secp384r1 } // namespace diff --git a/src/lib/math/pcurves/pcurves_secp521r1/pcurves_secp521r1.cpp b/src/lib/math/pcurves/pcurves_secp521r1/pcurves_secp521r1.cpp index 293134d0d3a..220e5a2cbe4 100644 --- a/src/lib/math/pcurves/pcurves_secp521r1/pcurves_secp521r1.cpp +++ b/src/lib/math/pcurves/pcurves_secp521r1/pcurves_secp521r1.cpp @@ -66,7 +66,49 @@ class Params final : public EllipticCurveParameters< // clang-format on -class Curve final : public EllipticCurve {}; +class Curve final : public EllipticCurve { + public: + // Return the square of the inverse of x + static FieldElement fe_invert2(const FieldElement& x) { + // Addition chain from https://eprint.iacr.org/2014/852.pdf page 6 + + FieldElement r = x.square(); + r *= x; + r = r.square(); + r *= x; + FieldElement rl = r; + r.square_n(3); + r *= rl; + r.square_n(1); + r *= x; + const auto a7 = r; + r.square_n(1); + r *= x; + rl = r; + r.square_n(8); + r *= rl; + rl = r; + r.square_n(16); + r *= rl; + rl = r; + r.square_n(32); + r *= rl; + rl = r; + r.square_n(64); + r *= rl; + rl = r; + r.square_n(128); + r *= rl; + rl = r; + r.square_n(256); + r *= rl; + r.square_n(7); + r *= a7; + r.square_n(2); + + return r; + } +}; } // namespace secp521r1 diff --git a/src/lib/math/pcurves/pcurves_wrap.h b/src/lib/math/pcurves/pcurves_wrap.h index e0b8351b9d8..2bbe274ef91 100644 --- a/src/lib/math/pcurves/pcurves_wrap.h +++ b/src/lib/math/pcurves/pcurves_wrap.h @@ -12,6 +12,12 @@ namespace Botan::PCurve { +template +struct CurveHasFeInvert2 : std::false_type {}; + +template +struct CurveHasFeInvert2 : std::true_type {}; + template class PrimeOrderCurveImpl final : public PrimeOrderCurve { public: @@ -28,6 +34,28 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve { WindowedMul2Table m_table; }; + static C::AffinePoint curve_point_to_affine(const C::ProjectivePoint& pt) { + if constexpr(CurveHasFeInvert2::value) { + if(pt.is_identity().as_bool()) { + return C::AffinePoint::identity(); + } + + const auto z2_inv = C::fe_invert2(pt.z()); + const auto z3_inv = z2_inv.square() * pt.z(); + return typename C::AffinePoint(pt.x() * z2_inv, pt.y() * z3_inv); + } else { + return pt.to_affine(); + } + } + + static C::FieldElement curve_point_to_affine_x(const C::ProjectivePoint& pt) { + if constexpr(CurveHasFeInvert2::value) { + return pt.x() * C::fe_invert2(pt.z()); + } else { + return pt.to_affine().x(); + } + } + static_assert(C::OrderBits <= PrimeOrderCurve::MaximumBitLength); static_assert(C::PrimeFieldBits <= PrimeOrderCurve::MaximumBitLength); @@ -78,7 +106,7 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve { return {}; } std::array x_bytes; - pt.to_affine().x().serialize_to(std::span{x_bytes}); + curve_point_to_affine_x(pt).serialize_to(std::span{x_bytes}); return stash(C::Scalar::from_wide_bytes(std::span{x_bytes})); } catch(std::bad_cast&) { throw Invalid_Argument("Curve mismatch"); @@ -88,14 +116,14 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve { Scalar base_point_mul_x_mod_order(const Scalar& scalar, RandomNumberGenerator& rng) const override { auto pt = m_mul_by_g.mul(from_stash(scalar), rng); std::array x_bytes; - pt.to_affine().x().serialize_to(std::span{x_bytes}); + curve_point_to_affine_x(pt).serialize_to(std::span{x_bytes}); return stash(C::Scalar::from_wide_bytes(std::span{x_bytes})); } AffinePoint generator() const override { return stash(C::G); } AffinePoint point_to_affine(const ProjectivePoint& pt) const override { - return stash(from_stash(pt).to_affine()); + return stash(curve_point_to_affine(from_stash(pt))); } ProjectivePoint point_to_projective(const AffinePoint& pt) const override {