From 0d4bee6083b246d1270dc8665da688101766c557 Mon Sep 17 00:00:00 2001
From: subtly <subtly>
Date: Fri, 24 Jul 2015 15:15:27 -0400
Subject: [PATCH] update secp256k1

---
 libdevcrypto/Common.cpp            |   14 +-
 secp256k1/CMakeLists.txt           |   28 +-
 secp256k1/ecdsa.h                  |   32 +-
 secp256k1/ecdsa_impl.h             |  263 +++++++
 secp256k1/eckey.h                  |   26 +
 secp256k1/eckey_impl.h             |  202 +++++
 secp256k1/ecmult.h                 |   28 +-
 secp256k1/ecmult_gen.h             |   43 ++
 secp256k1/ecmult_gen_impl.h        |  184 +++++
 secp256k1/ecmult_impl.h            |  317 ++++++++
 secp256k1/field.h                  |   94 ++-
 secp256k1/field_10x26.h            |   36 +-
 secp256k1/field_10x26_impl.h       | 1136 ++++++++++++++++++++++++++++
 secp256k1/field_5x52.h             |   36 +-
 secp256k1/field_5x52_asm_impl.h    |  502 ++++++++++++
 secp256k1/field_5x52_impl.h        |  454 +++++++++++
 secp256k1/field_5x52_int128_impl.h |  277 +++++++
 secp256k1/field_gmp.h              |   16 -
 secp256k1/field_impl.h             |  263 +++++++
 secp256k1/group.h                  |  119 +--
 secp256k1/group_impl.h             |  443 +++++++++++
 secp256k1/hash.h                   |   41 +
 secp256k1/hash_impl.h              |  293 +++++++
 secp256k1/impl/ecdsa.h             |  307 --------
 secp256k1/impl/ecmult.h            |  259 -------
 secp256k1/impl/field.h             |  173 -----
 secp256k1/impl/field_10x26.h       |  487 ------------
 secp256k1/impl/field_5x52.h        |  196 -----
 secp256k1/impl/field_5x52_asm.h    |   11 -
 secp256k1/impl/field_5x52_int128.h |  105 ---
 secp256k1/impl/field_gmp.h         |  155 ----
 secp256k1/impl/group.h             |  403 ----------
 secp256k1/impl/num.h               |   20 -
 secp256k1/impl/num_boost.h         |  212 ------
 secp256k1/impl/num_gmp.h           |  346 ---------
 secp256k1/impl/num_openssl.h       |  145 ----
 secp256k1/impl/util.h              |   45 --
 secp256k1/include/secp256k1.h      |  347 +++++++++
 secp256k1/libsecp256k1-config.h    |  134 ++++
 secp256k1/num.h                    |   87 +--
 secp256k1/num_boost.h              |   12 -
 secp256k1/num_gmp.h                |    8 +-
 secp256k1/num_gmp_impl.h           |  260 +++++++
 secp256k1/num_impl.h               |   24 +
 secp256k1/num_openssl.h            |   14 -
 secp256k1/scalar.h                 |   93 +++
 secp256k1/scalar_4x64.h            |   19 +
 secp256k1/scalar_4x64_impl.h       |  920 ++++++++++++++++++++++
 secp256k1/scalar_8x32.h            |   19 +
 secp256k1/scalar_8x32_impl.h       |  681 +++++++++++++++++
 secp256k1/scalar_impl.h            |  327 ++++++++
 secp256k1/secp256k1.c              |  562 +++++++++-----
 secp256k1/secp256k1.h              |  121 ---
 secp256k1/util.h                   |  105 ++-
 test/libdevcrypto/crypto.cpp       |    6 +-
 55 files changed, 7984 insertions(+), 3466 deletions(-)
 create mode 100644 secp256k1/ecdsa_impl.h
 create mode 100644 secp256k1/eckey.h
 create mode 100644 secp256k1/eckey_impl.h
 create mode 100644 secp256k1/ecmult_gen.h
 create mode 100644 secp256k1/ecmult_gen_impl.h
 create mode 100644 secp256k1/ecmult_impl.h
 create mode 100644 secp256k1/field_10x26_impl.h
 create mode 100644 secp256k1/field_5x52_asm_impl.h
 create mode 100644 secp256k1/field_5x52_impl.h
 create mode 100644 secp256k1/field_5x52_int128_impl.h
 delete mode 100644 secp256k1/field_gmp.h
 create mode 100644 secp256k1/field_impl.h
 create mode 100644 secp256k1/group_impl.h
 create mode 100644 secp256k1/hash.h
 create mode 100644 secp256k1/hash_impl.h
 delete mode 100644 secp256k1/impl/ecdsa.h
 delete mode 100644 secp256k1/impl/ecmult.h
 delete mode 100644 secp256k1/impl/field.h
 delete mode 100644 secp256k1/impl/field_10x26.h
 delete mode 100644 secp256k1/impl/field_5x52.h
 delete mode 100644 secp256k1/impl/field_5x52_asm.h
 delete mode 100644 secp256k1/impl/field_5x52_int128.h
 delete mode 100644 secp256k1/impl/field_gmp.h
 delete mode 100644 secp256k1/impl/group.h
 delete mode 100644 secp256k1/impl/num.h
 delete mode 100644 secp256k1/impl/num_boost.h
 delete mode 100644 secp256k1/impl/num_gmp.h
 delete mode 100644 secp256k1/impl/num_openssl.h
 delete mode 100644 secp256k1/impl/util.h
 create mode 100644 secp256k1/include/secp256k1.h
 create mode 100644 secp256k1/libsecp256k1-config.h
 delete mode 100644 secp256k1/num_boost.h
 create mode 100644 secp256k1/num_gmp_impl.h
 create mode 100644 secp256k1/num_impl.h
 delete mode 100644 secp256k1/num_openssl.h
 create mode 100644 secp256k1/scalar.h
 create mode 100644 secp256k1/scalar_4x64.h
 create mode 100644 secp256k1/scalar_4x64_impl.h
 create mode 100644 secp256k1/scalar_8x32.h
 create mode 100644 secp256k1/scalar_8x32_impl.h
 create mode 100644 secp256k1/scalar_impl.h
 delete mode 100644 secp256k1/secp256k1.h
diff --git a/libdevcrypto/Common.cpp b/libdevcrypto/Common.cpp
index 7cc16bd03..b94c8712f 100644
--- a/libdevcrypto/Common.cpp
+++ b/libdevcrypto/Common.cpp
@@ -32,7 +32,7 @@
 #include <libdevcore/FileSystem.h>
 #include <libdevcore/RLP.h>
 #if ETH_HAVE_SECP256K1
-#include <secp256k1/secp256k1.h>
+#include <secp256k1/include/secp256k1.h>
 #endif
 #include "AES.h"
 #include "CryptoPP.h"
@@ -44,8 +44,10 @@ using namespace dev::crypto;
 #ifdef ETH_HAVE_SECP256K1
 struct Secp256k1Context
 {
-	Secp256k1Context() { secp256k1_start(); }
-	~Secp256k1Context() { secp256k1_stop(); }
+	Secp256k1Context() { ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); }
+	~Secp256k1Context() { secp256k1_context_destroy(ctx); }
+	secp256k1_context_t* ctx;
+	operator secp256k1_context_t const*() const { return ctx; }
 };
 static Secp256k1Context s_secp256k1;
 #endif
@@ -75,7 +77,7 @@ Public dev::toPublic(Secret const& _secret)
 #ifdef ETH_HAVE_SECP256K1
 	bytes o(65);
 	int pubkeylen;
-	if (!secp256k1_ecdsa_pubkey_create(o.data(), &pubkeylen, _secret.data(), false))
+	if (!secp256k1_ec_pubkey_create(s_secp256k1, o.data(), &pubkeylen, _secret.data(), false))
 		return Public();
 	return FixedHash<64>(o.data()+1, Public::ConstructFromPointer);
 #else
@@ -201,7 +203,7 @@ Public dev::recover(Signature const& _sig, h256 const& _message)
 #ifdef ETH_HAVE_SECP256K1
 	bytes o(65);
 	int pubkeylen;
-	if (!secp256k1_ecdsa_recover_compact(_message.data(), h256::size, _sig.data(), o.data(), &pubkeylen, false, _sig[64]))
+	if (!secp256k1_ecdsa_recover_compact(s_secp256k1, _message.data(), _sig.data(), o.data(), &pubkeylen, false, _sig[64]))
 		return Public();
 	ret = FixedHash<64>(o.data() + 1, Public::ConstructFromPointer);
 #else
@@ -217,7 +219,7 @@ Signature dev::sign(Secret const& _k, h256 const& _hash)
 #ifdef ETH_HAVE_SECP256K1
 	Signature s;
 	int v;
-	if (!secp256k1_ecdsa_sign_compact(_hash.data(), h256::size, s.data(), _k.data(), Nonce::get().data(), &v))
+	if (!secp256k1_ecdsa_sign_compact(s_secp256k1, _hash.data(), s.data(), _k.data(), NULL, NULL, &v))
 		return Signature();
 	s[64] = v;
 	return s;
diff --git a/secp256k1/CMakeLists.txt b/secp256k1/CMakeLists.txt
index d66144fec..de08715cc 100644
--- a/secp256k1/CMakeLists.txt
+++ b/secp256k1/CMakeLists.txt
@@ -7,34 +7,12 @@ if (${CMAKE_MAJOR_VERSION} GREATER 2)
 endif()
 set(CMAKE_AUTOMOC OFF)
 
-set(CMAKE_ASM_COMPILER "yasm")
-
 set(EXECUTABLE secp256k1)
 file(GLOB HEADERS "*.h") 
 
-if (APPLE OR UNIX)
-
-	add_library(${EXECUTABLE} ${EXECUTABLE}.c field_5x52_asm.asm)
-	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -DUSE_FIELD_GMP -DUSE_NUM_GMP -DUSE_FIELD_INV_NUM")
-	target_link_libraries(${EXECUTABLE} ${GMP_LIBRARIES})
-elseif (CMAKE_COMPILER_IS_MINGW)
-
-	include_directories(${Boost_INCLUDE_DIRS})
-
-	add_library(${EXECUTABLE} ${EXECUTABLE}.c field_5x52_asm.asm)
-
-	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -W -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function -DUSE_FIELD_GMP -DUSE_NUM_GMP -DUSE_FIELD_INV_NUM")
-	target_link_libraries(${EXECUTABLE} ${GMP_LIBRARIES})
-else()
-
-	include_directories(${Boost_INCLUDE_DIRS})
-
-	add_library(${EXECUTABLE} ${EXECUTABLE}.c)
-	# /TP - compile project as cpp project
-	set_target_properties(${EXECUTABLE} PROPERTIES COMPILE_FLAGS "/TP /wd4244")
-	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_NUM_BOOST -DUSE_FIELD_10X26 -DUSE_FIELD_INV_BUILTIN")
-	
-endif()
+add_library(${EXECUTABLE} ${EXECUTABLE}.c)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_CONFIG_H -g -O2 -W -std=c89 -pedantic -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function -Wno-long-long -Wno-overlength-strings")
+target_link_libraries(${EXECUTABLE} ${GMP_LIBRARIES})	
 
 install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
 install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )
diff --git a/secp256k1/ecdsa.h b/secp256k1/ecdsa.h
index d9faaa3e8..4ef78e8af 100644
--- a/secp256k1/ecdsa.h
+++ b/secp256k1/ecdsa.h
@@ -1,28 +1,24 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_ECDSA_
 #define _SECP256K1_ECDSA_
 
-#include "num.h"
+#include "scalar.h"
+#include "group.h"
+#include "ecmult.h"
 
 typedef struct {
-    secp256k1_num_t r, s;
+    secp256k1_scalar_t r, s;
 } secp256k1_ecdsa_sig_t;
 
-void static secp256k1_ecdsa_sig_init(secp256k1_ecdsa_sig_t *r);
-void static secp256k1_ecdsa_sig_free(secp256k1_ecdsa_sig_t *r);
-
-int static secp256k1_ecdsa_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size);
-void static secp256k1_ecdsa_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed);
-int static secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size);
-int static secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a);
-int static secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message);
-int static secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *seckey, const secp256k1_num_t *message, const secp256k1_num_t *nonce, int *recid);
-int static secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_num_t *message, int recid);
-void static secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *r, const secp256k1_num_t *s);
-int static secp256k1_ecdsa_privkey_parse(secp256k1_num_t *key, const unsigned char *privkey, int privkeylen);
-int static secp256k1_ecdsa_privkey_serialize(unsigned char *privkey, int *privkeylen, const secp256k1_num_t *key, int compressed);
+static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size);
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a);
+static int secp256k1_ecdsa_sig_verify(const secp256k1_ecmult_context_t *ctx, const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message);
+static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid);
+static int secp256k1_ecdsa_sig_recover(const secp256k1_ecmult_context_t *ctx, const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid);
 
 #endif
diff --git a/secp256k1/ecdsa_impl.h b/secp256k1/ecdsa_impl.h
new file mode 100644
index 000000000..ed1d22818
--- /dev/null
+++ b/secp256k1/ecdsa_impl.h
@@ -0,0 +1,263 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                               *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+
+#ifndef _SECP256K1_ECDSA_IMPL_H_
+#define _SECP256K1_ECDSA_IMPL_H_
+
+#include "scalar.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult.h"
+#include "ecmult_gen.h"
+#include "ecdsa.h"
+
+/** Group order for secp256k1 defined as 'n' in "Standards for Efficient Cryptography" (SEC2) 2.7.1
+ *  sage: for t in xrange(1023, -1, -1):
+ *     ..   p = 2**256 - 2**32 - t
+ *     ..   if p.is_prime():
+ *     ..     print '%x'%p
+ *     ..     break
+ *   'fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f'
+ *  sage: a = 0
+ *  sage: b = 7
+ *  sage: F = FiniteField (p)
+ *  sage: '%x' % (EllipticCurve ([F (a), F (b)]).order())
+ *   'fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141'
+ */
+static const secp256k1_fe_t secp256k1_ecdsa_const_order_as_fe = SECP256K1_FE_CONST(
+    0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+    0xBAAEDCE6UL, 0xAF48A03BUL, 0xBFD25E8CUL, 0xD0364141UL
+);
+
+/** Difference between field and order, values 'p' and 'n' values defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ *  sage: p = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
+ *  sage: a = 0
+ *  sage: b = 7
+ *  sage: F = FiniteField (p)
+ *  sage: '%x' % (p - EllipticCurve ([F (a), F (b)]).order())
+ *   '14551231950b75fc4402da1722fc9baee'
+ */
+static const secp256k1_fe_t secp256k1_ecdsa_const_p_minus_order = SECP256K1_FE_CONST(
+    0, 0, 0, 1, 0x45512319UL, 0x50B75FC4UL, 0x402DA172UL, 0x2FC9BAEEUL
+);
+
+static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size) {
+    unsigned char ra[32] = {0}, sa[32] = {0};
+    const unsigned char *rp;
+    const unsigned char *sp;
+    int lenr;
+    int lens;
+    int overflow;
+    if (sig[0] != 0x30) {
+        return 0;
+    }
+    lenr = sig[3];
+    if (5+lenr >= size) {
+        return 0;
+    }
+    lens = sig[lenr+5];
+    if (sig[1] != lenr+lens+4) {
+        return 0;
+    }
+    if (lenr+lens+6 > size) {
+        return 0;
+    }
+    if (sig[2] != 0x02) {
+        return 0;
+    }
+    if (lenr == 0) {
+        return 0;
+    }
+    if (sig[lenr+4] != 0x02) {
+        return 0;
+    }
+    if (lens == 0) {
+        return 0;
+    }
+    sp = sig + 6 + lenr;
+    while (lens > 0 && sp[0] == 0) {
+        lens--;
+        sp++;
+    }
+    if (lens > 32) {
+        return 0;
+    }
+    rp = sig + 4;
+    while (lenr > 0 && rp[0] == 0) {
+        lenr--;
+        rp++;
+    }
+    if (lenr > 32) {
+        return 0;
+    }
+    memcpy(ra + 32 - lenr, rp, lenr);
+    memcpy(sa + 32 - lens, sp, lens);
+    overflow = 0;
+    secp256k1_scalar_set_b32(&r->r, ra, &overflow);
+    if (overflow) {
+        return 0;
+    }
+    secp256k1_scalar_set_b32(&r->s, sa, &overflow);
+    if (overflow) {
+        return 0;
+    }
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a) {
+    unsigned char r[33] = {0}, s[33] = {0};
+    unsigned char *rp = r, *sp = s;
+    int lenR = 33, lenS = 33;
+    secp256k1_scalar_get_b32(&r[1], &a->r);
+    secp256k1_scalar_get_b32(&s[1], &a->s);
+    while (lenR > 1 && rp[0] == 0 && rp[1] < 0x80) { lenR--; rp++; }
+    while (lenS > 1 && sp[0] == 0 && sp[1] < 0x80) { lenS--; sp++; }
+    if (*size < 6+lenS+lenR) {
+        return 0;
+    }
+    *size = 6 + lenS + lenR;
+    sig[0] = 0x30;
+    sig[1] = 4 + lenS + lenR;
+    sig[2] = 0x02;
+    sig[3] = lenR;
+    memcpy(sig+4, rp, lenR);
+    sig[4+lenR] = 0x02;
+    sig[5+lenR] = lenS;
+    memcpy(sig+lenR+6, sp, lenS);
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_verify(const secp256k1_ecmult_context_t *ctx, const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message) {
+    unsigned char c[32];
+    secp256k1_scalar_t sn, u1, u2;
+    secp256k1_fe_t xr;
+    secp256k1_gej_t pubkeyj;
+    secp256k1_gej_t pr;
+
+    if (secp256k1_scalar_is_zero(&sig->r) || secp256k1_scalar_is_zero(&sig->s)) {
+        return 0;
+    }
+
+    secp256k1_scalar_inverse_var(&sn, &sig->s);
+    secp256k1_scalar_mul(&u1, &sn, message);
+    secp256k1_scalar_mul(&u2, &sn, &sig->r);
+    secp256k1_gej_set_ge(&pubkeyj, pubkey);
+    secp256k1_ecmult(ctx, &pr, &pubkeyj, &u2, &u1);
+    if (secp256k1_gej_is_infinity(&pr)) {
+        return 0;
+    }
+    secp256k1_scalar_get_b32(c, &sig->r);
+    secp256k1_fe_set_b32(&xr, c);
+
+    /** We now have the recomputed R point in pr, and its claimed x coordinate (modulo n)
+     *  in xr. Naively, we would extract the x coordinate from pr (requiring a inversion modulo p),
+     *  compute the remainder modulo n, and compare it to xr. However:
+     *
+     *        xr == X(pr) mod n
+     *    <=> exists h. (xr + h * n < p && xr + h * n == X(pr))
+     *    [Since 2 * n > p, h can only be 0 or 1]
+     *    <=> (xr == X(pr)) || (xr + n < p && xr + n == X(pr))
+     *    [In Jacobian coordinates, X(pr) is pr.x / pr.z^2 mod p]
+     *    <=> (xr == pr.x / pr.z^2 mod p) || (xr + n < p && xr + n == pr.x / pr.z^2 mod p)
+     *    [Multiplying both sides of the equations by pr.z^2 mod p]
+     *    <=> (xr * pr.z^2 mod p == pr.x) || (xr + n < p && (xr + n) * pr.z^2 mod p == pr.x)
+     *
+     *  Thus, we can avoid the inversion, but we have to check both cases separately.
+     *  secp256k1_gej_eq_x implements the (xr * pr.z^2 mod p == pr.x) test.
+     */
+    if (secp256k1_gej_eq_x_var(&xr, &pr)) {
+        /* xr.x == xr * xr.z^2 mod p, so the signature is valid. */
+        return 1;
+    }
+    if (secp256k1_fe_cmp_var(&xr, &secp256k1_ecdsa_const_p_minus_order) >= 0) {
+        /* xr + p >= n, so we can skip testing the second case. */
+        return 0;
+    }
+    secp256k1_fe_add(&xr, &secp256k1_ecdsa_const_order_as_fe);
+    if (secp256k1_gej_eq_x_var(&xr, &pr)) {
+        /* (xr + n) * pr.z^2 mod p == pr.x, so the signature is valid. */
+        return 1;
+    }
+    return 0;
+}
+
+static int secp256k1_ecdsa_sig_recover(const secp256k1_ecmult_context_t *ctx, const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid) {
+    unsigned char brx[32];
+    secp256k1_fe_t fx;
+    secp256k1_ge_t x;
+    secp256k1_gej_t xj;
+    secp256k1_scalar_t rn, u1, u2;
+    secp256k1_gej_t qj;
+
+    if (secp256k1_scalar_is_zero(&sig->r) || secp256k1_scalar_is_zero(&sig->s)) {
+        return 0;
+    }
+
+    secp256k1_scalar_get_b32(brx, &sig->r);
+    VERIFY_CHECK(secp256k1_fe_set_b32(&fx, brx)); /* brx comes from a scalar, so is less than the order; certainly less than p */
+    if (recid & 2) {
+        if (secp256k1_fe_cmp_var(&fx, &secp256k1_ecdsa_const_p_minus_order) >= 0) {
+            return 0;
+        }
+        secp256k1_fe_add(&fx, &secp256k1_ecdsa_const_order_as_fe);
+    }
+    if (!secp256k1_ge_set_xo_var(&x, &fx, recid & 1)) {
+        return 0;
+    }
+    secp256k1_gej_set_ge(&xj, &x);
+    secp256k1_scalar_inverse_var(&rn, &sig->r);
+    secp256k1_scalar_mul(&u1, &rn, message);
+    secp256k1_scalar_negate(&u1, &u1);
+    secp256k1_scalar_mul(&u2, &rn, &sig->s);
+    secp256k1_ecmult(ctx, &qj, &xj, &u2, &u1);
+    secp256k1_ge_set_gej_var(pubkey, &qj);
+    return !secp256k1_gej_is_infinity(&qj);
+}
+
+static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid) {
+    unsigned char b[32];
+    secp256k1_gej_t rp;
+    secp256k1_ge_t r;
+    secp256k1_scalar_t n;
+    int overflow = 0;
+
+    secp256k1_ecmult_gen(ctx, &rp, nonce);
+    secp256k1_ge_set_gej(&r, &rp);
+    secp256k1_fe_normalize(&r.x);
+    secp256k1_fe_normalize(&r.y);
+    secp256k1_fe_get_b32(b, &r.x);
+    secp256k1_scalar_set_b32(&sig->r, b, &overflow);
+    if (secp256k1_scalar_is_zero(&sig->r)) {
+        /* P.x = order is on the curve, so technically sig->r could end up zero, which would be an invalid signature. */
+        secp256k1_gej_clear(&rp);
+        secp256k1_ge_clear(&r);
+        return 0;
+    }
+    if (recid) {
+        *recid = (overflow ? 2 : 0) | (secp256k1_fe_is_odd(&r.y) ? 1 : 0);
+    }
+    secp256k1_scalar_mul(&n, &sig->r, seckey);
+    secp256k1_scalar_add(&n, &n, message);
+    secp256k1_scalar_inverse(&sig->s, nonce);
+    secp256k1_scalar_mul(&sig->s, &sig->s, &n);
+    secp256k1_scalar_clear(&n);
+    secp256k1_gej_clear(&rp);
+    secp256k1_ge_clear(&r);
+    if (secp256k1_scalar_is_zero(&sig->s)) {
+        return 0;
+    }
+    if (secp256k1_scalar_is_high(&sig->s)) {
+        secp256k1_scalar_negate(&sig->s, &sig->s);
+        if (recid) {
+            *recid ^= 1;
+        }
+    }
+    return 1;
+}
+
+#endif
diff --git a/secp256k1/eckey.h b/secp256k1/eckey.h
new file mode 100644
index 000000000..53b818485
--- /dev/null
+++ b/secp256k1/eckey.h
@@ -0,0 +1,26 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECKEY_
+#define _SECP256K1_ECKEY_
+
+#include "group.h"
+#include "scalar.h"
+#include "ecmult.h"
+#include "ecmult_gen.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size);
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed);
+
+static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen);
+static int secp256k1_eckey_privkey_serialize(const secp256k1_ecmult_gen_context_t *ctx, unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed);
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak);
+static int secp256k1_eckey_pubkey_tweak_add(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak);
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak);
+static int secp256k1_eckey_pubkey_tweak_mul(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak);
+
+#endif
diff --git a/secp256k1/eckey_impl.h b/secp256k1/eckey_impl.h
new file mode 100644
index 000000000..a332bd34e
--- /dev/null
+++ b/secp256k1/eckey_impl.h
@@ -0,0 +1,202 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECKEY_IMPL_H_
+#define _SECP256K1_ECKEY_IMPL_H_
+
+#include "eckey.h"
+
+#include "scalar.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult_gen.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) {
+    if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) {
+        secp256k1_fe_t x;
+        return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == 0x03);
+    } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) {
+        secp256k1_fe_t x, y;
+        if (!secp256k1_fe_set_b32(&x, pub+1) || !secp256k1_fe_set_b32(&y, pub+33)) {
+            return 0;
+        }
+        secp256k1_ge_set_xy(elem, &x, &y);
+        if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07)) {
+            return 0;
+        }
+        return secp256k1_ge_is_valid_var(elem);
+    } else {
+        return 0;
+    }
+}
+
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed) {
+    if (secp256k1_ge_is_infinity(elem)) {
+        return 0;
+    }
+    secp256k1_fe_normalize_var(&elem->x);
+    secp256k1_fe_normalize_var(&elem->y);
+    secp256k1_fe_get_b32(&pub[1], &elem->x);
+    if (compressed) {
+        *size = 33;
+        pub[0] = 0x02 | (secp256k1_fe_is_odd(&elem->y) ? 0x01 : 0x00);
+    } else {
+        *size = 65;
+        pub[0] = 0x04;
+        secp256k1_fe_get_b32(&pub[33], &elem->y);
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen) {
+    unsigned char c[32] = {0};
+    const unsigned char *end = privkey + privkeylen;
+    int lenb = 0;
+    int len = 0;
+    int overflow = 0;
+    /* sequence header */
+    if (end < privkey+1 || *privkey != 0x30) {
+        return 0;
+    }
+    privkey++;
+    /* sequence length constructor */
+    if (end < privkey+1 || !(*privkey & 0x80)) {
+        return 0;
+    }
+    lenb = *privkey & ~0x80; privkey++;
+    if (lenb < 1 || lenb > 2) {
+        return 0;
+    }
+    if (end < privkey+lenb) {
+        return 0;
+    }
+    /* sequence length */
+    len = privkey[lenb-1] | (lenb > 1 ? privkey[lenb-2] << 8 : 0);
+    privkey += lenb;
+    if (end < privkey+len) {
+        return 0;
+    }
+    /* sequence element 0: version number (=1) */
+    if (end < privkey+3 || privkey[0] != 0x02 || privkey[1] != 0x01 || privkey[2] != 0x01) {
+        return 0;
+    }
+    privkey += 3;
+    /* sequence element 1: octet string, up to 32 bytes */
+    if (end < privkey+2 || privkey[0] != 0x04 || privkey[1] > 0x20 || end < privkey+2+privkey[1]) {
+        return 0;
+    }
+    memcpy(c + 32 - privkey[1], privkey + 2, privkey[1]);
+    secp256k1_scalar_set_b32(key, c, &overflow);
+    memset(c, 0, 32);
+    return !overflow;
+}
+
+static int secp256k1_eckey_privkey_serialize(const secp256k1_ecmult_gen_context_t *ctx, unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed) {
+    secp256k1_gej_t rp;
+    secp256k1_ge_t r;
+    int pubkeylen = 0;
+    secp256k1_ecmult_gen(ctx, &rp, key);
+    secp256k1_ge_set_gej(&r, &rp);
+    if (compressed) {
+        static const unsigned char begin[] = {
+            0x30,0x81,0xD3,0x02,0x01,0x01,0x04,0x20
+        };
+        static const unsigned char middle[] = {
+            0xA0,0x81,0x85,0x30,0x81,0x82,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
+            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
+            0x21,0x02,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
+            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
+            0x17,0x98,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
+            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x24,0x03,0x22,0x00
+        };
+        unsigned char *ptr = privkey;
+        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
+        secp256k1_scalar_get_b32(ptr, key); ptr += 32;
+        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
+        if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 1)) {
+            return 0;
+        }
+        ptr += pubkeylen;
+        *privkeylen = ptr - privkey;
+    } else {
+        static const unsigned char begin[] = {
+            0x30,0x82,0x01,0x13,0x02,0x01,0x01,0x04,0x20
+        };
+        static const unsigned char middle[] = {
+            0xA0,0x81,0xA5,0x30,0x81,0xA2,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
+            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
+            0x41,0x04,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
+            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
+            0x17,0x98,0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,0x5D,0xA4,0xFB,0xFC,0x0E,0x11,
+            0x08,0xA8,0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,0x9C,0x47,0xD0,0x8F,0xFB,0x10,
+            0xD4,0xB8,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
+            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x44,0x03,0x42,0x00
+        };
+        unsigned char *ptr = privkey;
+        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
+        secp256k1_scalar_get_b32(ptr, key); ptr += 32;
+        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
+        if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 0)) {
+            return 0;
+        }
+        ptr += pubkeylen;
+        *privkeylen = ptr - privkey;
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) {
+    secp256k1_scalar_add(key, key, tweak);
+    if (secp256k1_scalar_is_zero(key)) {
+        return 0;
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_pubkey_tweak_add(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) {
+    secp256k1_gej_t pt;
+    secp256k1_scalar_t one;
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_scalar_set_int(&one, 1);
+    secp256k1_ecmult(ctx, &pt, &pt, &one, tweak);
+
+    if (secp256k1_gej_is_infinity(&pt)) {
+        return 0;
+    }
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) {
+    if (secp256k1_scalar_is_zero(tweak)) {
+        return 0;
+    }
+
+    secp256k1_scalar_mul(key, key, tweak);
+    return 1;
+}
+
+static int secp256k1_eckey_pubkey_tweak_mul(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) {
+    secp256k1_scalar_t zero;
+    secp256k1_gej_t pt;
+    if (secp256k1_scalar_is_zero(tweak)) {
+        return 0;
+    }
+
+    secp256k1_scalar_set_int(&zero, 0);
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_ecmult(ctx, &pt, &pt, tweak, &zero);
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+#endif
diff --git a/secp256k1/ecmult.h b/secp256k1/ecmult.h
index 856bd284f..bab9e4ef5 100644
--- a/secp256k1/ecmult.h
+++ b/secp256k1/ecmult.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_ECMULT_
 #define _SECP256K1_ECMULT_
@@ -8,12 +10,22 @@
 #include "num.h"
 #include "group.h"
 
-static void secp256k1_ecmult_start(void);
-static void secp256k1_ecmult_stop(void);
+typedef struct {
+    /* For accelerating the computation of a*P + b*G: */
+    secp256k1_ge_storage_t (*pre_g)[];    /* odd multiples of the generator */
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge_storage_t (*pre_g_128)[]; /* odd multiples of 2^128*generator */
+#endif
+} secp256k1_ecmult_context_t;
+
+static void secp256k1_ecmult_context_init(secp256k1_ecmult_context_t *ctx);
+static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx);
+static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context_t *dst,
+                                           const secp256k1_ecmult_context_t *src);
+static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context_t *ctx);
+static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context_t *ctx);
 
-/** Multiply with the generator: R = a*G */
-static void secp256k1_ecmult_gen(secp256k1_gej_t *r, const secp256k1_num_t *a);
 /** Double multiply: R = na*A + ng*G */
-static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_num_t *na, const secp256k1_num_t *ng);
+static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng);
 
 #endif
diff --git a/secp256k1/ecmult_gen.h b/secp256k1/ecmult_gen.h
new file mode 100644
index 000000000..3745633c4
--- /dev/null
+++ b/secp256k1/ecmult_gen.h
@@ -0,0 +1,43 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_GEN_
+#define _SECP256K1_ECMULT_GEN_
+
+#include "scalar.h"
+#include "group.h"
+
+typedef struct {
+    /* For accelerating the computation of a*G:
+     * To harden against timing attacks, use the following mechanism:
+     * * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63.
+     * * Compute sum(n_i * 16^i * G + U_i, i=0..63), where:
+     *   * U_i = U * 2^i (for i=0..62)
+     *   * U_i = U * (1-2^63) (for i=63)
+     *   where U is a point with no known corresponding scalar. Note that sum(U_i, i=0..63) = 0.
+     * For each i, and each of the 16 possible values of n_i, (n_i * 16^i * G + U_i) is
+     * precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0..63).
+     * None of the resulting prec group elements have a known scalar, and neither do any of
+     * the intermediate sums while computing a*G.
+     */
+    secp256k1_ge_storage_t (*prec)[64][16]; /* prec[j][i] = 16^j * i * G + U_i */
+    secp256k1_scalar_t blind;
+    secp256k1_gej_t initial;
+} secp256k1_ecmult_gen_context_t;
+
+static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context_t* ctx);
+static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t* ctx);
+static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context_t *dst,
+                                               const secp256k1_ecmult_gen_context_t* src);
+static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context_t* ctx);
+static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context_t* ctx);
+
+/** Multiply with the generator: R = a*G */
+static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context_t* ctx, secp256k1_gej_t *r, const secp256k1_scalar_t *a);
+
+static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context_t *ctx, const unsigned char *seed32);
+
+#endif
diff --git a/secp256k1/ecmult_gen_impl.h b/secp256k1/ecmult_gen_impl.h
new file mode 100644
index 000000000..4697753ac
--- /dev/null
+++ b/secp256k1/ecmult_gen_impl.h
@@ -0,0 +1,184 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell      *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_GEN_IMPL_H_
+#define _SECP256K1_ECMULT_GEN_IMPL_H_
+
+#include "scalar.h"
+#include "group.h"
+#include "ecmult_gen.h"
+#include "hash_impl.h"
+
+static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context_t *ctx) {
+    ctx->prec = NULL;
+}
+
+static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *ctx) {
+    secp256k1_ge_t prec[1024];
+    secp256k1_gej_t gj;
+    secp256k1_gej_t nums_gej;
+    int i, j;
+
+    if (ctx->prec != NULL) {
+        return;
+    }
+
+    ctx->prec = (secp256k1_ge_storage_t (*)[64][16])checked_malloc(sizeof(*ctx->prec));
+
+    /* get the generator */
+    secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
+
+    /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
+    {
+        static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
+        secp256k1_fe_t nums_x;
+        secp256k1_ge_t nums_ge;
+        VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32));
+        VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0));
+        secp256k1_gej_set_ge(&nums_gej, &nums_ge);
+        /* Add G to make the bits in x uniformly distributed. */
+        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g);
+    }
+
+    /* compute prec. */
+    {
+        secp256k1_gej_t precj[1024]; /* Jacobian versions of prec. */
+        secp256k1_gej_t gbase;
+        secp256k1_gej_t numsbase;
+        gbase = gj; /* 16^j * G */
+        numsbase = nums_gej; /* 2^j * nums. */
+        for (j = 0; j < 64; j++) {
+            /* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
+            precj[j*16] = numsbase;
+            for (i = 1; i < 16; i++) {
+                secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase);
+            }
+            /* Multiply gbase by 16. */
+            for (i = 0; i < 4; i++) {
+                secp256k1_gej_double_var(&gbase, &gbase);
+            }
+            /* Multiply numbase by 2. */
+            secp256k1_gej_double_var(&numsbase, &numsbase);
+            if (j == 62) {
+                /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
+                secp256k1_gej_neg(&numsbase, &numsbase);
+                secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
+            }
+        }
+        secp256k1_ge_set_all_gej_var(1024, prec, precj);
+    }
+    for (j = 0; j < 64; j++) {
+        for (i = 0; i < 16; i++) {
+            secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*16 + i]);
+        }
+    }
+    secp256k1_ecmult_gen_blind(ctx, NULL);
+}
+
+static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context_t* ctx) {
+    return ctx->prec != NULL;
+}
+
+static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context_t *dst,
+                                               const secp256k1_ecmult_gen_context_t *src) {
+    if (src->prec == NULL) {
+        dst->prec = NULL;
+    } else {
+        dst->prec = (secp256k1_ge_storage_t (*)[64][16])checked_malloc(sizeof(*dst->prec));
+        memcpy(dst->prec, src->prec, sizeof(*dst->prec));
+        dst->initial = src->initial;
+        dst->blind = src->blind;
+    }
+}
+
+static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context_t *ctx) {
+    free(ctx->prec);
+    secp256k1_scalar_clear(&ctx->blind);
+    secp256k1_gej_clear(&ctx->initial);
+    ctx->prec = NULL;
+}
+
+static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_gej_t *r, const secp256k1_scalar_t *gn) {
+    secp256k1_ge_t add;
+    secp256k1_ge_storage_t adds;
+    secp256k1_scalar_t gnb;
+    int bits;
+    int i, j;
+    memset(&adds, 0, sizeof(adds));
+    *r = ctx->initial;
+    /* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
+    secp256k1_scalar_add(&gnb, gn, &ctx->blind);
+    add.infinity = 0;
+    for (j = 0; j < 64; j++) {
+        bits = secp256k1_scalar_get_bits(&gnb, j * 4, 4);
+        for (i = 0; i < 16; i++) {
+            /** This uses a conditional move to avoid any secret data in array indexes.
+             *   _Any_ use of secret indexes has been demonstrated to result in timing
+             *   sidechannels, even when the cache-line access patterns are uniform.
+             *  See also:
+             *   "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe
+             *    (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and
+             *   "Cache Attacks and Countermeasures: the Case of AES", RSA 2006,
+             *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
+             *    (http://www.tau.ac.il/~tromer/papers/cache.pdf)
+             */
+            secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[j][i], i == bits);
+        }
+        secp256k1_ge_from_storage(&add, &adds);
+        secp256k1_gej_add_ge(r, r, &add);
+    }
+    bits = 0;
+    secp256k1_ge_clear(&add);
+    secp256k1_scalar_clear(&gnb);
+}
+
+/* Setup blinding values for secp256k1_ecmult_gen. */
+static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context_t *ctx, const unsigned char *seed32) {
+    secp256k1_scalar_t b;
+    secp256k1_gej_t gb;
+    secp256k1_fe_t s;
+    unsigned char nonce32[32];
+    secp256k1_rfc6979_hmac_sha256_t rng;
+    int retry;
+    if (!seed32) {
+        /* When seed is NULL, reset the initial point and blinding value. */
+        secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g);
+        secp256k1_gej_neg(&ctx->initial, &ctx->initial);
+        secp256k1_scalar_set_int(&ctx->blind, 1);
+    }
+    /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
+    secp256k1_scalar_get_b32(nonce32, &ctx->blind);
+    /** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data,
+     *   and guards against weak or adversarial seeds.  This is a simpler and safer interface than
+     *   asking the caller for blinding values directly and expecting them to retry on failure.
+     */
+    secp256k1_rfc6979_hmac_sha256_initialize(&rng, seed32 ? seed32 : nonce32, 32, nonce32, 32, NULL, 0);
+    /* Retry for out of range results to achieve uniformity. */
+    do {
+        secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+        retry = !secp256k1_fe_set_b32(&s, nonce32);
+        retry |= secp256k1_fe_is_zero(&s);
+    } while (retry);
+    /* Randomize the projection to defend against multiplier sidechannels. */
+    secp256k1_gej_rescale(&ctx->initial, &s);
+    secp256k1_fe_clear(&s);
+    do {
+        secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+        secp256k1_scalar_set_b32(&b, nonce32, &retry);
+        /* A blinding value of 0 works, but would undermine the projection hardening. */
+        retry |= secp256k1_scalar_is_zero(&b);
+    } while (retry);
+    secp256k1_rfc6979_hmac_sha256_finalize(&rng);
+    memset(nonce32, 0, 32);
+    secp256k1_ecmult_gen(ctx, &gb, &b);
+    secp256k1_scalar_negate(&b, &b);
+    ctx->blind = b;
+    ctx->initial = gb;
+    secp256k1_scalar_clear(&b);
+    secp256k1_gej_clear(&gb);
+}
+
+#endif
diff --git a/secp256k1/ecmult_impl.h b/secp256k1/ecmult_impl.h
new file mode 100644
index 000000000..1b2856f83
--- /dev/null
+++ b/secp256k1/ecmult_impl.h
@@ -0,0 +1,317 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_IMPL_H_
+#define _SECP256K1_ECMULT_IMPL_H_
+
+#include "group.h"
+#include "scalar.h"
+#include "ecmult.h"
+
+/* optimal for 128-bit and 256-bit exponents. */
+#define WINDOW_A 5
+
+/** larger numbers may result in slightly better performance, at the cost of
+    exponentially larger precomputed tables. */
+#ifdef USE_ENDOMORPHISM
+/** Two tables for window size 15: 1.375 MiB. */
+#define WINDOW_G 15
+#else
+/** One table for window size 16: 1.375 MiB. */
+#define WINDOW_G 16
+#endif
+
+/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
+ *  pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
+ *  2^(w-2) entries.
+ *
+ *  There are two versions of this function:
+ *  - secp256k1_ecmult_precomp_wnaf_gej, which operates on group elements in jacobian notation,
+ *    fast to precompute, but slower to use in later additions.
+ *  - secp256k1_ecmult_precomp_wnaf_ge, which operates on group elements in affine notations,
+ *    (much) slower to precompute, but a bit faster to use in later additions.
+ *  To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
+ *  G is constant, so it only needs to be done once in advance.
+ */
+static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
+    secp256k1_gej_t d;
+    int i;
+    pre[0] = *a;
+    secp256k1_gej_double_var(&d, &pre[0]);
+    for (i = 1; i < (1 << (w-2)); i++) {
+        secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]);
+    }
+}
+
+static void secp256k1_ecmult_table_precomp_ge_storage_var(secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, int w) {
+    secp256k1_gej_t d;
+    int i;
+    const int table_size = 1 << (w-2);
+    secp256k1_gej_t *prej = (secp256k1_gej_t *)checked_malloc(sizeof(secp256k1_gej_t) * table_size);
+    secp256k1_ge_t *prea = (secp256k1_ge_t *)checked_malloc(sizeof(secp256k1_ge_t) * table_size);
+    prej[0] = *a;
+    secp256k1_gej_double_var(&d, a);
+    for (i = 1; i < table_size; i++) {
+        secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
+    }
+    secp256k1_ge_set_all_gej_var(table_size, prea, prej);
+    for (i = 0; i < table_size; i++) {
+        secp256k1_ge_to_storage(&pre[i], &prea[i]);
+    }
+    free(prej);
+    free(prea);
+}
+
+/** The number of entries a table with precomputed multiples needs to have. */
+#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
+
+/** The following two macro retrieves a particular odd multiple from a table
+ *  of precomputed multiples. */
+#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) do { \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    if ((n) > 0) { \
+        *(r) = (pre)[((n)-1)/2]; \
+    } else { \
+        secp256k1_gej_neg((r), &(pre)[(-(n)-1)/2]); \
+    } \
+} while(0)
+#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    if ((n) > 0) { \
+        secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
+    } else { \
+        secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
+        secp256k1_ge_neg((r), (r)); \
+    } \
+} while(0)
+
+static void secp256k1_ecmult_context_init(secp256k1_ecmult_context_t *ctx) {
+    ctx->pre_g = NULL;
+#ifdef USE_ENDOMORPHISM
+    ctx->pre_g_128 = NULL;
+#endif
+}
+
+static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx) {
+    secp256k1_gej_t gj;
+
+    if (ctx->pre_g != NULL) {
+        return;
+    }
+
+    /* get the generator */
+    secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
+
+    ctx->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
+
+    /* precompute the tables with odd multiples */
+    secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g, &gj, WINDOW_G);
+
+#ifdef USE_ENDOMORPHISM
+    {
+        secp256k1_gej_t g_128j;
+        int i;
+
+        ctx->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(sizeof((*ctx->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
+
+        /* calculate 2^128*generator */
+        g_128j = gj;
+        for (i = 0; i < 128; i++) {
+            secp256k1_gej_double_var(&g_128j, &g_128j);
+        }
+        secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g_128, &g_128j, WINDOW_G);
+    }
+#endif
+}
+
+static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context_t *dst,
+                                           const secp256k1_ecmult_context_t *src) {
+    if (src->pre_g == NULL) {
+        dst->pre_g = NULL;
+    } else {
+        size_t size = sizeof((*dst->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
+        dst->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(size);
+        memcpy(dst->pre_g, src->pre_g, size);
+    }
+#ifdef USE_ENDOMORPHISM
+    if (src->pre_g_128 == NULL) {
+        dst->pre_g_128 = NULL;
+    } else {
+        size_t size = sizeof((*dst->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
+        dst->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(size);
+        memcpy(dst->pre_g_128, src->pre_g_128, size);
+    }
+#endif
+}
+
+static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context_t *ctx) {
+    return ctx->pre_g != NULL;
+}
+
+static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context_t *ctx) {
+    free(ctx->pre_g);
+#ifdef USE_ENDOMORPHISM
+    free(ctx->pre_g_128);
+#endif
+    secp256k1_ecmult_context_init(ctx);
+}
+
+/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
+ *  with the following guarantees:
+ *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
+ *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
+ *  - the number of set values in wnaf is returned. This number is at most 256, and at most one more
+ *  - than the number of bits in the (absolute value) of the input.
+ */
+static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
+    secp256k1_scalar_t s = *a;
+    int set_bits = 0;
+    int bit = 0;
+    int sign = 1;
+
+    if (secp256k1_scalar_get_bits(&s, 255, 1)) {
+        secp256k1_scalar_negate(&s, &s);
+        sign = -1;
+    }
+
+    while (bit < 256) {
+        int now;
+        int word;
+        if (secp256k1_scalar_get_bits(&s, bit, 1) == 0) {
+            bit++;
+            continue;
+        }
+        while (set_bits < bit) {
+            wnaf[set_bits++] = 0;
+        }
+        now = w;
+        if (bit + now > 256) {
+            now = 256 - bit;
+        }
+        word = secp256k1_scalar_get_bits_var(&s, bit, now);
+        if (word & (1 << (w-1))) {
+            secp256k1_scalar_add_bit(&s, bit + w);
+            wnaf[set_bits++] = sign * (word - (1 << w));
+        } else {
+            wnaf[set_bits++] = sign * word;
+        }
+        bit += now;
+    }
+    return set_bits;
+}
+
+static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) {
+    secp256k1_gej_t tmpj;
+    secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge_t tmpa;
+#ifdef USE_ENDOMORPHISM
+    secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_scalar_t na_1, na_lam;
+    /* Splitted G factors. */
+    secp256k1_scalar_t ng_1, ng_128;
+    int wnaf_na_1[130];
+    int wnaf_na_lam[130];
+    int bits_na_1;
+    int bits_na_lam;
+    int wnaf_ng_1[129];
+    int bits_ng_1;
+    int wnaf_ng_128[129];
+    int bits_ng_128;
+#else
+    int wnaf_na[256];
+    int bits_na;
+    int wnaf_ng[257];
+    int bits_ng;
+#endif
+    int i;
+    int bits;
+
+#ifdef USE_ENDOMORPHISM
+    /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
+    secp256k1_scalar_split_lambda_var(&na_1, &na_lam, na);
+
+    /* build wnaf representation for na_1 and na_lam. */
+    bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
+    bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, &na_lam, WINDOW_A);
+    VERIFY_CHECK(bits_na_1 <= 130);
+    VERIFY_CHECK(bits_na_lam <= 130);
+    bits = bits_na_1;
+    if (bits_na_lam > bits) {
+        bits = bits_na_lam;
+    }
+#else
+    /* build wnaf representation for na. */
+    bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     na,      WINDOW_A);
+    bits = bits_na;
+#endif
+
+    /* calculate odd multiples of a */
+    secp256k1_ecmult_table_precomp_gej_var(pre_a, a, WINDOW_A);
+
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_gej_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+
+    /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
+    secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
+
+    /* Build wnaf representation for ng_1 and ng_128 */
+    bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   &ng_1,   WINDOW_G);
+    bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, &ng_128, WINDOW_G);
+    if (bits_ng_1 > bits) {
+        bits = bits_ng_1;
+    }
+    if (bits_ng_128 > bits) {
+        bits = bits_ng_128;
+    }
+#else
+    bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     ng,      WINDOW_G);
+    if (bits_ng > bits) {
+        bits = bits_ng;
+    }
+#endif
+
+    secp256k1_gej_set_infinity(r);
+
+    for (i = bits-1; i >= 0; i--) {
+        int n;
+        secp256k1_gej_double_var(r, r);
+#ifdef USE_ENDOMORPHISM
+        if (i < bits_na_1 && (n = wnaf_na_1[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+        if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a_lam, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+        if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
+            secp256k1_gej_add_ge_var(r, r, &tmpa);
+        }
+        if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
+            secp256k1_gej_add_ge_var(r, r, &tmpa);
+        }
+#else
+        if (i < bits_na && (n = wnaf_na[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+        if (i < bits_ng && (n = wnaf_ng[i])) {
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
+            secp256k1_gej_add_ge_var(r, r, &tmpa);
+        }
+#endif
+    }
+}
+
+#endif
diff --git a/secp256k1/field.h b/secp256k1/field.h
index 7d187d02c..41b280892 100644
--- a/secp256k1/field.h
+++ b/secp256k1/field.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_FIELD_
 #define _SECP256K1_FIELD_
@@ -16,9 +18,11 @@
  *    normality.
  */
 
-#if defined(USE_FIELD_GMP)
-#include "field_gmp.h"
-#elif defined(USE_FIELD_10X26)
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_FIELD_10X26)
 #include "field_10x26.h"
 #elif defined(USE_FIELD_5X52)
 #include "field_5x52.h"
@@ -26,74 +30,90 @@
 #error "Please select field implementation"
 #endif
 
-typedef struct {
-    secp256k1_num_t p;
-} secp256k1_fe_consts_t;
+/** Normalize a field element. */
+static void secp256k1_fe_normalize(secp256k1_fe_t *r);
 
-static const secp256k1_fe_consts_t *secp256k1_fe_consts = NULL;
+/** Weakly normalize a field element: reduce it magnitude to 1, but don't fully normalize. */
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r);
 
-/** Initialize field element precomputation data. */
-void static secp256k1_fe_start(void);
+/** Normalize a field element, without constant-time guarantee. */
+static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
 
-/** Unload field element precomputation data. */
-void static secp256k1_fe_stop(void);
+/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
+ *  implementation may optionally normalize the input, but this should not be relied upon. */
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r);
 
-/** Normalize a field element. */
-void static secp256k1_fe_normalize(secp256k1_fe_t *r);
+/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
+ *  implementation may optionally normalize the input, but this should not be relied upon. */
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r);
 
 /** Set a field element equal to a small integer. Resulting field element is normalized. */
-void static secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
+static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
 
 /** Verify whether a field element is zero. Requires the input to be normalized. */
-int  static secp256k1_fe_is_zero(const secp256k1_fe_t *a);
+static int secp256k1_fe_is_zero(const secp256k1_fe_t *a);
 
 /** Check the "oddness" of a field element. Requires the input to be normalized. */
-int  static secp256k1_fe_is_odd(const secp256k1_fe_t *a);
+static int secp256k1_fe_is_odd(const secp256k1_fe_t *a);
+
+/** Compare two field elements. Requires magnitude-1 inputs. */
+static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
 
 /** Compare two field elements. Requires both inputs to be normalized */
-int  static secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
 
-/** Set a field element equal to 32-byte big endian value. Resulting field element is normalized. */
-void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a);
+/** Set a field element equal to 32-byte big endian value. If succesful, the resulting field element is normalized. */
+static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a);
 
 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
-void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a);
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a);
 
 /** Set a field element equal to the additive inverse of another. Takes a maximum magnitude of the input
  *  as an argument. The magnitude of the output is one higher. */
-void static secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m);
+static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m);
 
 /** Multiplies the passed field element with a small integer constant. Multiplies the magnitude by that
  *  small integer. */
-void static secp256k1_fe_mul_int(secp256k1_fe_t *r, int a);
+static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a);
 
 /** Adds a field element to another. The result has the sum of the inputs' magnitudes as magnitude. */
-void static secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a);
 
 /** Sets a field element to be the product of two others. Requires the inputs' magnitudes to be at most 8.
  *  The output magnitude is 1 (but not guaranteed to be normalized). */
-void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b);
 
 /** Sets a field element to be the square of another. Requires the input's magnitude to be at most 8.
  *  The output magnitude is 1 (but not guaranteed to be normalized). */
-void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a);
 
-/** Sets a field element to be the (modular) square root of another. Requires the inputs' magnitude to
- *  be at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
-void static secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+/** Sets a field element to be the (modular) square root (if any exist) of another. Requires the
+ *  input's magnitude to be at most 8. The output magnitude is 1 (but not guaranteed to be
+ *  normalized). Return value indicates whether a square root was found. */
+static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
 
 /** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
  *  at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
-void static secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a);
 
 /** Potentially faster version of secp256k1_fe_inv, without constant-time guarantee. */
-void static secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be
+ *  at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and
+ *  outputs must not overlap in memory. */
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Convert a field element to the storage type. */
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t*);
 
+/** Convert a field element back from the storage type. */
+static void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t*);
 
-/** Convert a field element to a hexadecimal string. */
-void static secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a);
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag);
 
-/** Convert a hexadecimal string to a field element. */
-void static secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen);
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag);
 
 #endif
diff --git a/secp256k1/field_10x26.h b/secp256k1/field_10x26.h
index d544139e8..44bce6525 100644
--- a/secp256k1/field_10x26.h
+++ b/secp256k1/field_10x26.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_FIELD_REPR_
 #define _SECP256K1_FIELD_REPR_
@@ -8,7 +10,7 @@
 #include <stdint.h>
 
 typedef struct {
-    // X = sum(i=0..9, elem[i]*2^26) mod n
+    /* X = sum(i=0..9, elem[i]*2^26) mod n */
     uint32_t n[10];
 #ifdef VERIFY
     int magnitude;
@@ -16,4 +18,30 @@ typedef struct {
 #endif
 } secp256k1_fe_t;
 
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) & 0x3FFFFFFUL, \
+    ((d0) >> 26) | ((d1) & 0xFFFFFUL) << 6, \
+    ((d1) >> 20) | ((d2) & 0x3FFFUL) << 12, \
+    ((d2) >> 14) | ((d3) & 0xFFUL) << 18, \
+    ((d3) >> 8) | ((d4) & 0x3) << 24, \
+    ((d4) >> 2) & 0x3FFFFFFUL, \
+    ((d4) >> 28) | ((d5) & 0x3FFFFFUL) << 4, \
+    ((d5) >> 22) | ((d6) & 0xFFFF) << 10, \
+    ((d6) >> 16) | ((d7) & 0x3FF) << 16, \
+    ((d7) >> 10) \
+}
+
+#ifdef VERIFY
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1}
+#else
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))}
+#endif
+
+typedef struct {
+    uint32_t n[8];
+} secp256k1_fe_storage_t;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}
+
 #endif
diff --git a/secp256k1/field_10x26_impl.h b/secp256k1/field_10x26_impl.h
new file mode 100644
index 000000000..871b91f91
--- /dev/null
+++ b/secp256k1/field_10x26_impl.h
@@ -0,0 +1,1136 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
+#define _SECP256K1_FIELD_REPR_IMPL_H_
+
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "num.h"
+#include "field.h"
+
+#ifdef VERIFY
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    const uint32_t *d = a->n;
+    int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
+    r &= (d[0] <= 0x3FFFFFFUL * m);
+    r &= (d[1] <= 0x3FFFFFFUL * m);
+    r &= (d[2] <= 0x3FFFFFFUL * m);
+    r &= (d[3] <= 0x3FFFFFFUL * m);
+    r &= (d[4] <= 0x3FFFFFFUL * m);
+    r &= (d[5] <= 0x3FFFFFFUL * m);
+    r &= (d[6] <= 0x3FFFFFFUL * m);
+    r &= (d[7] <= 0x3FFFFFFUL * m);
+    r &= (d[8] <= 0x3FFFFFFUL * m);
+    r &= (d[9] <= 0x03FFFFFUL * m);
+    r &= (a->magnitude >= 0);
+    r &= (a->magnitude <= 32);
+    if (a->normalized) {
+        r &= (a->magnitude <= 1);
+        if (r && (d[9] == 0x03FFFFFUL)) {
+            uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
+            if (mid == 0x3FFFFFFUL) {
+                r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
+            }
+        }
+    }
+    VERIFY_CHECK(r == 1);
+}
+#else
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    (void)a;
+}
+#endif
+
+static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t m;
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
+        & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
+
+    /* Apply the final reduction (for constant-time behaviour, we do it always) */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+    /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
+    VERIFY_CHECK(t9 >> 22 == x);
+
+    /* Mask off the possible multiple of 2^256 from the final reduction */
+    t9 &= 0x03FFFFFUL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t m;
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
+        & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
+
+    if (x) {
+        t0 += 0x3D1UL; t1 += (x << 6);
+        t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+        t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+        t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+        t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+        t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+        t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+        t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+        t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+        t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+        /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
+        VERIFY_CHECK(t9 >> 22 == x);
+
+        /* Mask off the possible multiple of 2^256 from the final reduction */
+        t9 &= 0x03FFFFFUL;
+    }
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint32_t z0, z1;
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
+                                         z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    return (z0 == 0) | (z1 == 0x3FFFFFFUL);
+}
+
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
+    uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+    uint32_t z0, z1;
+    uint32_t x;
+
+    t0 = r->n[0];
+    t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    x = t9 >> 22;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    z0 = t0 & 0x3FFFFFFUL;
+    z1 = z0 ^ 0x3D0UL;
+
+    /* Fast return path should catch the majority of cases */
+    if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
+        return 0;
+    }
+
+    t1 = r->n[1];
+    t2 = r->n[2];
+    t3 = r->n[3];
+    t4 = r->n[4];
+    t5 = r->n[5];
+    t6 = r->n[6];
+    t7 = r->n[7];
+    t8 = r->n[8];
+
+    t9 &= 0x03FFFFFUL;
+    t1 += (x << 6);
+
+    t1 += (t0 >> 26); t0  = z0;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
+                                         z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    return (z0 == 0) | (z1 == 0x3FFFFFFUL);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
+    r->n[0] = a;
+    r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+    const uint32_t *t = a->n;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return a->n[0] & 1;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+    int i;
+#ifdef VERIFY
+    a->magnitude = 0;
+    a->normalized = 1;
+#endif
+    for (i=0; i<10; i++) {
+        a->n[i] = 0;
+    }
+}
+
+static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    int i;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    VERIFY_CHECK(b->normalized);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    for (i = 9; i >= 0; i--) {
+        if (a->n[i] > b->n[i]) {
+            return 1;
+        }
+        if (a->n[i] < b->n[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    int i;
+    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+    r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
+    for (i=0; i<32; i++) {
+        int j;
+        for (j=0; j<4; j++) {
+            int limb = (8*i+2*j)/26;
+            int shift = (8*i+2*j)%26;
+            r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
+        }
+    }
+    if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
+        return 0;
+    }
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+    return 1;
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+    int i;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    for (i=0; i<32; i++) {
+        int j;
+        int c = 0;
+        for (j=0; j<4; j++) {
+            int limb = (8*i+2*j)/26;
+            int shift = (8*i+2*j)%26;
+            c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
+        }
+        r[31-i] = c;
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= m);
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
+    r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
+    r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
+    r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
+    r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
+    r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
+    r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
+    r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
+    r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
+    r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
+#ifdef VERIFY
+    r->magnitude = m + 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
+    r->n[0] *= a;
+    r->n[1] *= a;
+    r->n[2] *= a;
+    r->n[3] *= a;
+    r->n[4] *= a;
+    r->n[5] *= a;
+    r->n[6] *= a;
+    r->n[7] *= a;
+    r->n[8] *= a;
+    r->n[9] *= a;
+#ifdef VERIFY
+    r->magnitude *= a;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] += a->n[0];
+    r->n[1] += a->n[1];
+    r->n[2] += a->n[2];
+    r->n[3] += a->n[3];
+    r->n[4] += a->n[4];
+    r->n[5] += a->n[5];
+    r->n[6] += a->n[6];
+    r->n[7] += a->n[7];
+    r->n[8] += a->n[8];
+    r->n[9] += a->n[9];
+#ifdef VERIFY
+    r->magnitude += a->magnitude;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
+    uint64_t c, d;
+    uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
+    uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+
+    VERIFY_BITS(a[0], 30);
+    VERIFY_BITS(a[1], 30);
+    VERIFY_BITS(a[2], 30);
+    VERIFY_BITS(a[3], 30);
+    VERIFY_BITS(a[4], 30);
+    VERIFY_BITS(a[5], 30);
+    VERIFY_BITS(a[6], 30);
+    VERIFY_BITS(a[7], 30);
+    VERIFY_BITS(a[8], 30);
+    VERIFY_BITS(a[9], 26);
+    VERIFY_BITS(b[0], 30);
+    VERIFY_BITS(b[1], 30);
+    VERIFY_BITS(b[2], 30);
+    VERIFY_BITS(b[3], 30);
+    VERIFY_BITS(b[4], 30);
+    VERIFY_BITS(b[5], 30);
+    VERIFY_BITS(b[6], 30);
+    VERIFY_BITS(b[7], 30);
+    VERIFY_BITS(b[8], 30);
+    VERIFY_BITS(b[9], 26);
+
+    /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
+     */
+
+    d  = (uint64_t)a[0] * b[9]
+       + (uint64_t)a[1] * b[8]
+       + (uint64_t)a[2] * b[7]
+       + (uint64_t)a[3] * b[6]
+       + (uint64_t)a[4] * b[5]
+       + (uint64_t)a[5] * b[4]
+       + (uint64_t)a[6] * b[3]
+       + (uint64_t)a[7] * b[2]
+       + (uint64_t)a[8] * b[1]
+       + (uint64_t)a[9] * b[0];
+    /* VERIFY_BITS(d, 64); */
+    /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+    t9 = d & M; d >>= 26;
+    VERIFY_BITS(t9, 26);
+    VERIFY_BITS(d, 38);
+    /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+
+    c  = (uint64_t)a[0] * b[0];
+    VERIFY_BITS(c, 60);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
+    d += (uint64_t)a[1] * b[9]
+       + (uint64_t)a[2] * b[8]
+       + (uint64_t)a[3] * b[7]
+       + (uint64_t)a[4] * b[6]
+       + (uint64_t)a[5] * b[5]
+       + (uint64_t)a[6] * b[4]
+       + (uint64_t)a[7] * b[3]
+       + (uint64_t)a[8] * b[2]
+       + (uint64_t)a[9] * b[1];
+    VERIFY_BITS(d, 63);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    u0 = d & M; d >>= 26; c += u0 * R0;
+    VERIFY_BITS(u0, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 61);
+    /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    t0 = c & M; c >>= 26; c += u0 * R1;
+    VERIFY_BITS(t0, 26);
+    VERIFY_BITS(c, 37);
+    /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+
+    c += (uint64_t)a[0] * b[1]
+       + (uint64_t)a[1] * b[0];
+    VERIFY_BITS(c, 62);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    d += (uint64_t)a[2] * b[9]
+       + (uint64_t)a[3] * b[8]
+       + (uint64_t)a[4] * b[7]
+       + (uint64_t)a[5] * b[6]
+       + (uint64_t)a[6] * b[5]
+       + (uint64_t)a[7] * b[4]
+       + (uint64_t)a[8] * b[3]
+       + (uint64_t)a[9] * b[2];
+    VERIFY_BITS(d, 63);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    u1 = d & M; d >>= 26; c += u1 * R0;
+    VERIFY_BITS(u1, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    t1 = c & M; c >>= 26; c += u1 * R1;
+    VERIFY_BITS(t1, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+
+    c += (uint64_t)a[0] * b[2]
+       + (uint64_t)a[1] * b[1]
+       + (uint64_t)a[2] * b[0];
+    VERIFY_BITS(c, 62);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    d += (uint64_t)a[3] * b[9]
+       + (uint64_t)a[4] * b[8]
+       + (uint64_t)a[5] * b[7]
+       + (uint64_t)a[6] * b[6]
+       + (uint64_t)a[7] * b[5]
+       + (uint64_t)a[8] * b[4]
+       + (uint64_t)a[9] * b[3];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    u2 = d & M; d >>= 26; c += u2 * R0;
+    VERIFY_BITS(u2, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    t2 = c & M; c >>= 26; c += u2 * R1;
+    VERIFY_BITS(t2, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[3]
+       + (uint64_t)a[1] * b[2]
+       + (uint64_t)a[2] * b[1]
+       + (uint64_t)a[3] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    d += (uint64_t)a[4] * b[9]
+       + (uint64_t)a[5] * b[8]
+       + (uint64_t)a[6] * b[7]
+       + (uint64_t)a[7] * b[6]
+       + (uint64_t)a[8] * b[5]
+       + (uint64_t)a[9] * b[4];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    u3 = d & M; d >>= 26; c += u3 * R0;
+    VERIFY_BITS(u3, 26);
+    VERIFY_BITS(d, 37);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    t3 = c & M; c >>= 26; c += u3 * R1;
+    VERIFY_BITS(t3, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[4]
+       + (uint64_t)a[1] * b[3]
+       + (uint64_t)a[2] * b[2]
+       + (uint64_t)a[3] * b[1]
+       + (uint64_t)a[4] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[5] * b[9]
+       + (uint64_t)a[6] * b[8]
+       + (uint64_t)a[7] * b[7]
+       + (uint64_t)a[8] * b[6]
+       + (uint64_t)a[9] * b[5];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    u4 = d & M; d >>= 26; c += u4 * R0;
+    VERIFY_BITS(u4, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    t4 = c & M; c >>= 26; c += u4 * R1;
+    VERIFY_BITS(t4, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[5]
+       + (uint64_t)a[1] * b[4]
+       + (uint64_t)a[2] * b[3]
+       + (uint64_t)a[3] * b[2]
+       + (uint64_t)a[4] * b[1]
+       + (uint64_t)a[5] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[6] * b[9]
+       + (uint64_t)a[7] * b[8]
+       + (uint64_t)a[8] * b[7]
+       + (uint64_t)a[9] * b[6];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    u5 = d & M; d >>= 26; c += u5 * R0;
+    VERIFY_BITS(u5, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    t5 = c & M; c >>= 26; c += u5 * R1;
+    VERIFY_BITS(t5, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[6]
+       + (uint64_t)a[1] * b[5]
+       + (uint64_t)a[2] * b[4]
+       + (uint64_t)a[3] * b[3]
+       + (uint64_t)a[4] * b[2]
+       + (uint64_t)a[5] * b[1]
+       + (uint64_t)a[6] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[7] * b[9]
+       + (uint64_t)a[8] * b[8]
+       + (uint64_t)a[9] * b[7];
+    VERIFY_BITS(d, 61);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    u6 = d & M; d >>= 26; c += u6 * R0;
+    VERIFY_BITS(u6, 26);
+    VERIFY_BITS(d, 35);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    t6 = c & M; c >>= 26; c += u6 * R1;
+    VERIFY_BITS(t6, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[7]
+       + (uint64_t)a[1] * b[6]
+       + (uint64_t)a[2] * b[5]
+       + (uint64_t)a[3] * b[4]
+       + (uint64_t)a[4] * b[3]
+       + (uint64_t)a[5] * b[2]
+       + (uint64_t)a[6] * b[1]
+       + (uint64_t)a[7] * b[0];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x8000007C00000007ULL);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[8] * b[9]
+       + (uint64_t)a[9] * b[8];
+    VERIFY_BITS(d, 58);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    u7 = d & M; d >>= 26; c += u7 * R0;
+    VERIFY_BITS(u7, 26);
+    VERIFY_BITS(d, 32);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
+    /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    t7 = c & M; c >>= 26; c += u7 * R1;
+    VERIFY_BITS(t7, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[8]
+       + (uint64_t)a[1] * b[7]
+       + (uint64_t)a[2] * b[6]
+       + (uint64_t)a[3] * b[5]
+       + (uint64_t)a[4] * b[4]
+       + (uint64_t)a[5] * b[3]
+       + (uint64_t)a[6] * b[2]
+       + (uint64_t)a[7] * b[1]
+       + (uint64_t)a[8] * b[0];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000007B80000008ULL);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[9] * b[9];
+    VERIFY_BITS(d, 57);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    u8 = d & M; d >>= 26; c += u8 * R0;
+    VERIFY_BITS(u8, 26);
+    VERIFY_BITS(d, 31);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[3] = t3;
+    VERIFY_BITS(r[3], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = t4;
+    VERIFY_BITS(r[4], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[5] = t5;
+    VERIFY_BITS(r[5], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[6] = t6;
+    VERIFY_BITS(r[6], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[7] = t7;
+    VERIFY_BITS(r[7], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[8] = c & M; c >>= 26; c += u8 * R1;
+    VERIFY_BITS(r[8], 26);
+    VERIFY_BITS(c, 39);
+    /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R0 + t9;
+    VERIFY_BITS(c, 45);
+    /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
+    VERIFY_BITS(r[9], 22);
+    VERIFY_BITS(c, 46);
+    /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    d    = c * (R0 >> 4) + t0;
+    VERIFY_BITS(d, 56);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[0] = d & M; d >>= 26;
+    VERIFY_BITS(r[0], 26);
+    VERIFY_BITS(d, 30);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += c * (R1 >> 4) + t1;
+    VERIFY_BITS(d, 53);
+    VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[1] = d & M; d >>= 26;
+    VERIFY_BITS(r[1], 26);
+    VERIFY_BITS(d, 27);
+    VERIFY_CHECK(d <= 0x4000000ULL);
+    /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += t2;
+    VERIFY_BITS(d, 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = d;
+    VERIFY_BITS(r[2], 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
+    uint64_t c, d;
+    uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
+    uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+
+    VERIFY_BITS(a[0], 30);
+    VERIFY_BITS(a[1], 30);
+    VERIFY_BITS(a[2], 30);
+    VERIFY_BITS(a[3], 30);
+    VERIFY_BITS(a[4], 30);
+    VERIFY_BITS(a[5], 30);
+    VERIFY_BITS(a[6], 30);
+    VERIFY_BITS(a[7], 30);
+    VERIFY_BITS(a[8], 30);
+    VERIFY_BITS(a[9], 26);
+
+    /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
+     */
+
+    d  = (uint64_t)(a[0]*2) * a[9]
+       + (uint64_t)(a[1]*2) * a[8]
+       + (uint64_t)(a[2]*2) * a[7]
+       + (uint64_t)(a[3]*2) * a[6]
+       + (uint64_t)(a[4]*2) * a[5];
+    /* VERIFY_BITS(d, 64); */
+    /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+    t9 = d & M; d >>= 26;
+    VERIFY_BITS(t9, 26);
+    VERIFY_BITS(d, 38);
+    /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+
+    c  = (uint64_t)a[0] * a[0];
+    VERIFY_BITS(c, 60);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
+    d += (uint64_t)(a[1]*2) * a[9]
+       + (uint64_t)(a[2]*2) * a[8]
+       + (uint64_t)(a[3]*2) * a[7]
+       + (uint64_t)(a[4]*2) * a[6]
+       + (uint64_t)a[5] * a[5];
+    VERIFY_BITS(d, 63);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    u0 = d & M; d >>= 26; c += u0 * R0;
+    VERIFY_BITS(u0, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 61);
+    /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    t0 = c & M; c >>= 26; c += u0 * R1;
+    VERIFY_BITS(t0, 26);
+    VERIFY_BITS(c, 37);
+    /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[1];
+    VERIFY_BITS(c, 62);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    d += (uint64_t)(a[2]*2) * a[9]
+       + (uint64_t)(a[3]*2) * a[8]
+       + (uint64_t)(a[4]*2) * a[7]
+       + (uint64_t)(a[5]*2) * a[6];
+    VERIFY_BITS(d, 63);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    u1 = d & M; d >>= 26; c += u1 * R0;
+    VERIFY_BITS(u1, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    t1 = c & M; c >>= 26; c += u1 * R1;
+    VERIFY_BITS(t1, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[2]
+       + (uint64_t)a[1] * a[1];
+    VERIFY_BITS(c, 62);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    d += (uint64_t)(a[3]*2) * a[9]
+       + (uint64_t)(a[4]*2) * a[8]
+       + (uint64_t)(a[5]*2) * a[7]
+       + (uint64_t)a[6] * a[6];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    u2 = d & M; d >>= 26; c += u2 * R0;
+    VERIFY_BITS(u2, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    t2 = c & M; c >>= 26; c += u2 * R1;
+    VERIFY_BITS(t2, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[3]
+       + (uint64_t)(a[1]*2) * a[2];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    d += (uint64_t)(a[4]*2) * a[9]
+       + (uint64_t)(a[5]*2) * a[8]
+       + (uint64_t)(a[6]*2) * a[7];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    u3 = d & M; d >>= 26; c += u3 * R0;
+    VERIFY_BITS(u3, 26);
+    VERIFY_BITS(d, 37);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    t3 = c & M; c >>= 26; c += u3 * R1;
+    VERIFY_BITS(t3, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[4]
+       + (uint64_t)(a[1]*2) * a[3]
+       + (uint64_t)a[2] * a[2];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[5]*2) * a[9]
+       + (uint64_t)(a[6]*2) * a[8]
+       + (uint64_t)a[7] * a[7];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    u4 = d & M; d >>= 26; c += u4 * R0;
+    VERIFY_BITS(u4, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    t4 = c & M; c >>= 26; c += u4 * R1;
+    VERIFY_BITS(t4, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[5]
+       + (uint64_t)(a[1]*2) * a[4]
+       + (uint64_t)(a[2]*2) * a[3];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[6]*2) * a[9]
+       + (uint64_t)(a[7]*2) * a[8];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    u5 = d & M; d >>= 26; c += u5 * R0;
+    VERIFY_BITS(u5, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    t5 = c & M; c >>= 26; c += u5 * R1;
+    VERIFY_BITS(t5, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[6]
+       + (uint64_t)(a[1]*2) * a[5]
+       + (uint64_t)(a[2]*2) * a[4]
+       + (uint64_t)a[3] * a[3];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[7]*2) * a[9]
+       + (uint64_t)a[8] * a[8];
+    VERIFY_BITS(d, 61);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    u6 = d & M; d >>= 26; c += u6 * R0;
+    VERIFY_BITS(u6, 26);
+    VERIFY_BITS(d, 35);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    t6 = c & M; c >>= 26; c += u6 * R1;
+    VERIFY_BITS(t6, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[7]
+       + (uint64_t)(a[1]*2) * a[6]
+       + (uint64_t)(a[2]*2) * a[5]
+       + (uint64_t)(a[3]*2) * a[4];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x8000007C00000007ULL);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[8]*2) * a[9];
+    VERIFY_BITS(d, 58);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    u7 = d & M; d >>= 26; c += u7 * R0;
+    VERIFY_BITS(u7, 26);
+    VERIFY_BITS(d, 32);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
+    /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    t7 = c & M; c >>= 26; c += u7 * R1;
+    VERIFY_BITS(t7, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[8]
+       + (uint64_t)(a[1]*2) * a[7]
+       + (uint64_t)(a[2]*2) * a[6]
+       + (uint64_t)(a[3]*2) * a[5]
+       + (uint64_t)a[4] * a[4];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000007B80000008ULL);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[9] * a[9];
+    VERIFY_BITS(d, 57);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    u8 = d & M; d >>= 26; c += u8 * R0;
+    VERIFY_BITS(u8, 26);
+    VERIFY_BITS(d, 31);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[3] = t3;
+    VERIFY_BITS(r[3], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = t4;
+    VERIFY_BITS(r[4], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[5] = t5;
+    VERIFY_BITS(r[5], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[6] = t6;
+    VERIFY_BITS(r[6], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[7] = t7;
+    VERIFY_BITS(r[7], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[8] = c & M; c >>= 26; c += u8 * R1;
+    VERIFY_BITS(r[8], 26);
+    VERIFY_BITS(c, 39);
+    /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R0 + t9;
+    VERIFY_BITS(c, 45);
+    /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
+    VERIFY_BITS(r[9], 22);
+    VERIFY_BITS(c, 46);
+    /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    d    = c * (R0 >> 4) + t0;
+    VERIFY_BITS(d, 56);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[0] = d & M; d >>= 26;
+    VERIFY_BITS(r[0], 26);
+    VERIFY_BITS(d, 30);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += c * (R1 >> 4) + t1;
+    VERIFY_BITS(d, 53);
+    VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[1] = d & M; d >>= 26;
+    VERIFY_BITS(r[1], 26);
+    VERIFY_BITS(d, 27);
+    VERIFY_CHECK(d <= 0x4000000ULL);
+    /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += t2;
+    VERIFY_BITS(d, 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = d;
+    VERIFY_BITS(r[2], 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    VERIFY_CHECK(b->magnitude <= 8);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+    VERIFY_CHECK(r != b);
+#endif
+    secp256k1_fe_mul_inner(r->n, a->n, b->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    secp256k1_fe_verify(a);
+#endif
+    secp256k1_fe_sqr_inner(r->n, a->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+    r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
+    r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
+    r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
+    r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
+    r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
+static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+    r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
+    r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
+    r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
+}
+
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+#endif
+    r->n[0] = a->n[0] | a->n[1] << 26;
+    r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
+    r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
+    r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
+    r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
+    r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
+    r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
+    r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
+}
+
+static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) {
+    r->n[0] = a->n[0] & 0x3FFFFFFUL;
+    r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
+    r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
+    r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
+    r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
+    r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
+    r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
+    r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
+    r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
+    r->n[9] = a->n[7] >> 10;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+#endif
+}
+
+#endif
diff --git a/secp256k1/field_5x52.h b/secp256k1/field_5x52.h
index 9d5de2cc4..4513d36f4 100644
--- a/secp256k1/field_5x52.h
+++ b/secp256k1/field_5x52.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_FIELD_REPR_
 #define _SECP256K1_FIELD_REPR_
@@ -8,7 +10,7 @@
 #include <stdint.h>
 
 typedef struct {
-    // X = sum(i=0..4, elem[i]*2^52) mod n
+    /* X = sum(i=0..4, elem[i]*2^52) mod n */
     uint64_t n[5];
 #ifdef VERIFY
     int magnitude;
@@ -16,4 +18,30 @@ typedef struct {
 #endif
 } secp256k1_fe_t;
 
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) | ((uint64_t)(d1) & 0xFFFFFUL) << 32, \
+    ((d1) >> 20) | ((uint64_t)(d2)) << 12 | ((uint64_t)(d3) & 0xFFUL) << 44, \
+    ((d3) >> 8) | ((uint64_t)(d4) & 0xFFFFFFFUL) << 24, \
+    ((d4) >> 28) | ((uint64_t)(d5)) << 4 | ((uint64_t)(d6) & 0xFFFFUL) << 36, \
+    ((d6) >> 16) | ((uint64_t)(d7)) << 16 \
+}
+
+#ifdef VERIFY
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1}
+#else
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))}
+#endif
+
+typedef struct {
+    uint64_t n[4];
+} secp256k1_fe_storage_t;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ \
+    (d0) | ((uint64_t)(d1)) << 32, \
+    (d2) | ((uint64_t)(d3)) << 32, \
+    (d4) | ((uint64_t)(d5)) << 32, \
+    (d6) | ((uint64_t)(d7)) << 32 \
+}}
+
 #endif
diff --git a/secp256k1/field_5x52_asm_impl.h b/secp256k1/field_5x52_asm_impl.h
new file mode 100644
index 000000000..98cc004bf
--- /dev/null
+++ b/secp256k1/field_5x52_asm_impl.h
@@ -0,0 +1,502 @@
+/**********************************************************************
+ * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille               *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+/**
+ * Changelog:
+ * - March 2013, Diederik Huys:    original version
+ * - November 2014, Pieter Wuille: updated to use Peter Dettman's parallel multiplication algorithm
+ * - December 2014, Pieter Wuille: converted from YASM to GCC inline assembly
+ */
+
+#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
+#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
+/**
+ * Registers: rdx:rax = multiplication accumulator
+ *            r9:r8   = c
+ *            r15:rcx = d
+ *            r10-r14 = a0-a4
+ *            rbx     = b
+ *            rdi     = r
+ *            rsi     = a / t?
+ */
+  uint64_t tmp1, tmp2, tmp3;
+__asm__ __volatile__(
+    "movq 0(%%rsi),%%r10\n"
+    "movq 8(%%rsi),%%r11\n"
+    "movq 16(%%rsi),%%r12\n"
+    "movq 24(%%rsi),%%r13\n"
+    "movq 32(%%rsi),%%r14\n"
+
+    /* d += a3 * b0 */
+    "movq 0(%%rbx),%%rax\n"
+    "mulq %%r13\n"
+    "movq %%rax,%%rcx\n"
+    "movq %%rdx,%%r15\n"
+    /* d += a2 * b1 */
+    "movq 8(%%rbx),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a1 * b2 */
+    "movq 16(%%rbx),%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d = a0 * b3 */
+    "movq 24(%%rbx),%%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* c = a4 * b4 */
+    "movq 32(%%rbx),%%rax\n"
+    "mulq %%r14\n"
+    "movq %%rax,%%r8\n"
+    "movq %%rdx,%%r9\n"
+    /* d += (c & M) * R */
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* c >>= 52 (%%r8 only) */
+    "shrdq $52,%%r9,%%r8\n"
+    /* t3 (tmp1) = d & M */
+    "movq %%rcx,%%rsi\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rsi\n"
+    "movq %%rsi,%q1\n"
+    /* d >>= 52 */
+    "shrdq $52,%%r15,%%rcx\n"
+    "xorq %%r15,%%r15\n"
+    /* d += a4 * b0 */
+    "movq 0(%%rbx),%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a3 * b1 */
+    "movq 8(%%rbx),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a2 * b2 */
+    "movq 16(%%rbx),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a1 * b3 */
+    "movq 24(%%rbx),%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a0 * b4 */
+    "movq 32(%%rbx),%%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += c * R */
+    "movq %%r8,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* t4 = d & M (%%rsi) */
+    "movq %%rcx,%%rsi\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rsi\n"
+    /* d >>= 52 */
+    "shrdq $52,%%r15,%%rcx\n"
+    "xorq %%r15,%%r15\n"
+    /* tx = t4 >> 48 (tmp3) */
+    "movq %%rsi,%%rax\n"
+    "shrq $48,%%rax\n"
+    "movq %%rax,%q3\n"
+    /* t4 &= (M >> 4) (tmp2) */
+    "movq $0xffffffffffff,%%rax\n"
+    "andq %%rax,%%rsi\n"
+    "movq %%rsi,%q2\n"
+    /* c = a0 * b0 */
+    "movq 0(%%rbx),%%rax\n"
+    "mulq %%r10\n"
+    "movq %%rax,%%r8\n"
+    "movq %%rdx,%%r9\n"
+    /* d += a4 * b1 */
+    "movq 8(%%rbx),%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a3 * b2 */
+    "movq 16(%%rbx),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a2 * b3 */
+    "movq 24(%%rbx),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a1 * b4 */
+    "movq 32(%%rbx),%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* u0 = d & M (%%rsi) */
+    "movq %%rcx,%%rsi\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rsi\n"
+    /* d >>= 52 */
+    "shrdq $52,%%r15,%%rcx\n"
+    "xorq %%r15,%%r15\n"
+    /* u0 = (u0 << 4) | tx (%%rsi) */
+    "shlq $4,%%rsi\n"
+    "movq %q3,%%rax\n"
+    "orq %%rax,%%rsi\n"
+    /* c += u0 * (R >> 4) */
+    "movq $0x1000003d1,%%rax\n"
+    "mulq %%rsi\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* r[0] = c & M */
+    "movq %%r8,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq %%rax,0(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* c += a1 * b0 */
+    "movq 0(%%rbx),%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* c += a0 * b1 */
+    "movq 8(%%rbx),%%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d += a4 * b2 */
+    "movq 16(%%rbx),%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a3 * b3 */
+    "movq 24(%%rbx),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a2 * b4 */
+    "movq 32(%%rbx),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* c += (d & M) * R */
+    "movq %%rcx,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d >>= 52 */
+    "shrdq $52,%%r15,%%rcx\n"
+    "xorq %%r15,%%r15\n"
+    /* r[1] = c & M */
+    "movq %%r8,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq %%rax,8(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* c += a2 * b0 */
+    "movq 0(%%rbx),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* c += a1 * b1 */
+    "movq 8(%%rbx),%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* c += a0 * b2 (last use of %%r10 = a0) */
+    "movq 16(%%rbx),%%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
+    "movq %q2,%%rsi\n"
+    "movq %q1,%%r10\n"
+    /* d += a4 * b3 */
+    "movq 24(%%rbx),%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* d += a3 * b4 */
+    "movq 32(%%rbx),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rcx\n"
+    "adcq %%rdx,%%r15\n"
+    /* c += (d & M) * R */
+    "movq %%rcx,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d >>= 52 (%%rcx only) */
+    "shrdq $52,%%r15,%%rcx\n"
+    /* r[2] = c & M */
+    "movq %%r8,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq %%rax,16(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* c += t3 */
+    "addq %%r10,%%r8\n"
+    /* c += d * R */
+    "movq %%rcx,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* r[3] = c & M */
+    "movq %%r8,%%rax\n"
+    "movq $0xfffffffffffff,%%rdx\n"
+    "andq %%rdx,%%rax\n"
+    "movq %%rax,24(%%rdi)\n"
+    /* c >>= 52 (%%r8 only) */
+    "shrdq $52,%%r9,%%r8\n"
+    /* c += t4 (%%r8 only) */
+    "addq %%rsi,%%r8\n"
+    /* r[4] = c */
+    "movq %%r8,32(%%rdi)\n"
+: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
+: "b"(b), "D"(r)
+: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
+);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
+/**
+ * Registers: rdx:rax = multiplication accumulator
+ *            r9:r8   = c
+ *            rcx:rbx = d
+ *            r10-r14 = a0-a4
+ *            r15     = M (0xfffffffffffff)
+ *            rdi     = r
+ *            rsi     = a / t?
+ */
+  uint64_t tmp1, tmp2, tmp3;
+__asm__ __volatile__(
+    "movq 0(%%rsi),%%r10\n"
+    "movq 8(%%rsi),%%r11\n"
+    "movq 16(%%rsi),%%r12\n"
+    "movq 24(%%rsi),%%r13\n"
+    "movq 32(%%rsi),%%r14\n"
+    "movq $0xfffffffffffff,%%r15\n"
+
+    /* d = (a0*2) * a3 */
+    "leaq (%%r10,%%r10,1),%%rax\n"
+    "mulq %%r13\n"
+    "movq %%rax,%%rbx\n"
+    "movq %%rdx,%%rcx\n"
+    /* d += (a1*2) * a2 */
+    "leaq (%%r11,%%r11,1),%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* c = a4 * a4 */
+    "movq %%r14,%%rax\n"
+    "mulq %%r14\n"
+    "movq %%rax,%%r8\n"
+    "movq %%rdx,%%r9\n"
+    /* d += (c & M) * R */
+    "andq %%r15,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* c >>= 52 (%%r8 only) */
+    "shrdq $52,%%r9,%%r8\n"
+    /* t3 (tmp1) = d & M */
+    "movq %%rbx,%%rsi\n"
+    "andq %%r15,%%rsi\n"
+    "movq %%rsi,%q1\n"
+    /* d >>= 52 */
+    "shrdq $52,%%rcx,%%rbx\n"
+    "xorq %%rcx,%%rcx\n"
+    /* a4 *= 2 */
+    "addq %%r14,%%r14\n"
+    /* d += a0 * a4 */
+    "movq %%r10,%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* d+= (a1*2) * a3 */
+    "leaq (%%r11,%%r11,1),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* d += a2 * a2 */
+    "movq %%r12,%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* d += c * R */
+    "movq %%r8,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* t4 = d & M (%%rsi) */
+    "movq %%rbx,%%rsi\n"
+    "andq %%r15,%%rsi\n"
+    /* d >>= 52 */
+    "shrdq $52,%%rcx,%%rbx\n"
+    "xorq %%rcx,%%rcx\n"
+    /* tx = t4 >> 48 (tmp3) */
+    "movq %%rsi,%%rax\n"
+    "shrq $48,%%rax\n"
+    "movq %%rax,%q3\n"
+    /* t4 &= (M >> 4) (tmp2) */
+    "movq $0xffffffffffff,%%rax\n"
+    "andq %%rax,%%rsi\n"
+    "movq %%rsi,%q2\n"
+    /* c = a0 * a0 */
+    "movq %%r10,%%rax\n"
+    "mulq %%r10\n"
+    "movq %%rax,%%r8\n"
+    "movq %%rdx,%%r9\n"
+    /* d += a1 * a4 */
+    "movq %%r11,%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* d += (a2*2) * a3 */
+    "leaq (%%r12,%%r12,1),%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* u0 = d & M (%%rsi) */
+    "movq %%rbx,%%rsi\n"
+    "andq %%r15,%%rsi\n"
+    /* d >>= 52 */
+    "shrdq $52,%%rcx,%%rbx\n"
+    "xorq %%rcx,%%rcx\n"
+    /* u0 = (u0 << 4) | tx (%%rsi) */
+    "shlq $4,%%rsi\n"
+    "movq %q3,%%rax\n"
+    "orq %%rax,%%rsi\n"
+    /* c += u0 * (R >> 4) */
+    "movq $0x1000003d1,%%rax\n"
+    "mulq %%rsi\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* r[0] = c & M */
+    "movq %%r8,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq %%rax,0(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* a0 *= 2 */
+    "addq %%r10,%%r10\n"
+    /* c += a0 * a1 */
+    "movq %%r10,%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d += a2 * a4 */
+    "movq %%r12,%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* d += a3 * a3 */
+    "movq %%r13,%%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* c += (d & M) * R */
+    "movq %%rbx,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d >>= 52 */
+    "shrdq $52,%%rcx,%%rbx\n"
+    "xorq %%rcx,%%rcx\n"
+    /* r[1] = c & M */
+    "movq %%r8,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq %%rax,8(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* c += a0 * a2 (last use of %%r10) */
+    "movq %%r10,%%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
+    "movq %q2,%%rsi\n"
+    "movq %q1,%%r10\n"
+    /* c += a1 * a1 */
+    "movq %%r11,%%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d += a3 * a4 */
+    "movq %%r13,%%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax,%%rbx\n"
+    "adcq %%rdx,%%rcx\n"
+    /* c += (d & M) * R */
+    "movq %%rbx,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* d >>= 52 (%%rbx only) */
+    "shrdq $52,%%rcx,%%rbx\n"
+    /* r[2] = c & M */
+    "movq %%r8,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq %%rax,16(%%rdi)\n"
+    /* c >>= 52 */
+    "shrdq $52,%%r9,%%r8\n"
+    "xorq %%r9,%%r9\n"
+    /* c += t3 */
+    "addq %%r10,%%r8\n"
+    /* c += d * R */
+    "movq %%rbx,%%rax\n"
+    "movq $0x1000003d10,%%rdx\n"
+    "mulq %%rdx\n"
+    "addq %%rax,%%r8\n"
+    "adcq %%rdx,%%r9\n"
+    /* r[3] = c & M */
+    "movq %%r8,%%rax\n"
+    "andq %%r15,%%rax\n"
+    "movq %%rax,24(%%rdi)\n"
+    /* c >>= 52 (%%r8 only) */
+    "shrdq $52,%%r9,%%r8\n"
+    /* c += t4 (%%r8 only) */
+    "addq %%rsi,%%r8\n"
+    /* r[4] = c */
+    "movq %%r8,32(%%rdi)\n"
+: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
+: "D"(r)
+: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
+);
+}
+
+#endif
diff --git a/secp256k1/field_5x52_impl.h b/secp256k1/field_5x52_impl.h
new file mode 100644
index 000000000..bda4c3dfc
--- /dev/null
+++ b/secp256k1/field_5x52_impl.h
@@ -0,0 +1,454 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
+#define _SECP256K1_FIELD_REPR_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include <string.h>
+#include "util.h"
+#include "num.h"
+#include "field.h"
+
+#if defined(USE_ASM_X86_64)
+#include "field_5x52_asm_impl.h"
+#else
+#include "field_5x52_int128_impl.h"
+#endif
+
+/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F,
+ *  represented as 5 uint64_t's in base 2^52. The values are allowed to contain >52 each. In particular,
+ *  each FieldElem has a 'magnitude' associated with it. Internally, a magnitude M means each element
+ *  is at most M*(2^53-1), except the most significant one, which is limited to M*(2^49-1). All operations
+ *  accept any input with magnitude at most M, and have different rules for propagating magnitude to their
+ *  output.
+ */
+
+#ifdef VERIFY
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    const uint64_t *d = a->n;
+    int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
+   /* secp256k1 'p' value defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
+    r &= (d[0] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[1] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[2] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[3] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[4] <= 0x0FFFFFFFFFFFFULL * m);
+    r &= (a->magnitude >= 0);
+    r &= (a->magnitude <= 2048);
+    if (a->normalized) {
+        r &= (a->magnitude <= 1);
+        if (r && (d[4] == 0x0FFFFFFFFFFFFULL) && ((d[3] & d[2] & d[1]) == 0xFFFFFFFFFFFFFULL)) {
+            r &= (d[0] < 0xFFFFEFFFFFC2FULL);
+        }
+    }
+    VERIFY_CHECK(r == 1);
+}
+#else
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    (void)a;
+}
+#endif
+
+static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
+        & (t0 >= 0xFFFFEFFFFFC2FULL));
+
+    /* Apply the final reduction (for constant-time behaviour, we do it always) */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
+    VERIFY_CHECK(t4 >> 48 == x);
+
+    /* Mask off the possible multiple of 2^256 from the final reduction */
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
+        & (t0 >= 0xFFFFEFFFFFC2FULL));
+
+    if (x) {
+        t0 += 0x1000003D1ULL;
+        t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+        t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+        t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+        t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+        /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
+        VERIFY_CHECK(t4 >> 48 == x);
+
+        /* Mask off the possible multiple of 2^256 from the final reduction */
+        t4 &= 0x0FFFFFFFFFFFFULL;
+    }
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint64_t z0, z1;
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0  = t0; z1  = t0 ^ 0x1000003D0ULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
+    uint64_t t0, t1, t2, t3, t4;
+    uint64_t z0, z1;
+    uint64_t x;
+
+    t0 = r->n[0];
+    t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    x = t4 >> 48;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    z0 = t0 & 0xFFFFFFFFFFFFFULL;
+    z1 = z0 ^ 0x1000003D0ULL;
+
+    /* Fast return path should catch the majority of cases */
+    if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL)) {
+        return 0;
+    }
+
+    t1 = r->n[1];
+    t2 = r->n[2];
+    t3 = r->n[3];
+
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    t1 += (t0 >> 52); t0  = z0;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
+    r->n[0] = a;
+    r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+    const uint64_t *t = a->n;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return a->n[0] & 1;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+    int i;
+#ifdef VERIFY
+    a->magnitude = 0;
+    a->normalized = 1;
+#endif
+    for (i=0; i<5; i++) {
+        a->n[i] = 0;
+    }
+}
+
+static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    int i;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    VERIFY_CHECK(b->normalized);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    for (i = 4; i >= 0; i--) {
+        if (a->n[i] > b->n[i]) {
+            return 1;
+        }
+        if (a->n[i] < b->n[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    int i;
+    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+    for (i=0; i<32; i++) {
+        int j;
+        for (j=0; j<2; j++) {
+            int limb = (8*i+4*j)/52;
+            int shift = (8*i+4*j)%52;
+            r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
+        }
+    }
+    if (r->n[4] == 0x0FFFFFFFFFFFFULL && (r->n[3] & r->n[2] & r->n[1]) == 0xFFFFFFFFFFFFFULL && r->n[0] >= 0xFFFFEFFFFFC2FULL) {
+        return 0;
+    }
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+    return 1;
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+    int i;
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    for (i=0; i<32; i++) {
+        int j;
+        int c = 0;
+        for (j=0; j<2; j++) {
+            int limb = (8*i+4*j)/52;
+            int shift = (8*i+4*j)%52;
+            c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
+        }
+        r[31-i] = c;
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= m);
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] = 0xFFFFEFFFFFC2FULL * 2 * (m + 1) - a->n[0];
+    r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[1];
+    r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[2];
+    r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[3];
+    r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * (m + 1) - a->n[4];
+#ifdef VERIFY
+    r->magnitude = m + 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
+    r->n[0] *= a;
+    r->n[1] *= a;
+    r->n[2] *= a;
+    r->n[3] *= a;
+    r->n[4] *= a;
+#ifdef VERIFY
+    r->magnitude *= a;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] += a->n[0];
+    r->n[1] += a->n[1];
+    r->n[2] += a->n[2];
+    r->n[3] += a->n[3];
+    r->n[4] += a->n[4];
+#ifdef VERIFY
+    r->magnitude += a->magnitude;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    VERIFY_CHECK(b->magnitude <= 8);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+    VERIFY_CHECK(r != b);
+#endif
+    secp256k1_fe_mul_inner(r->n, a->n, b->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    secp256k1_fe_verify(a);
+#endif
+    secp256k1_fe_sqr_inner(r->n, a->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint64_t mask0, mask1;
+    mask0 = flag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
+static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
+    uint64_t mask0, mask1;
+    mask0 = flag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+}
+
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+#endif
+    r->n[0] = a->n[0] | a->n[1] << 52;
+    r->n[1] = a->n[1] >> 12 | a->n[2] << 40;
+    r->n[2] = a->n[2] >> 24 | a->n[3] << 28;
+    r->n[3] = a->n[3] >> 36 | a->n[4] << 16;
+}
+
+static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) {
+    r->n[0] = a->n[0] & 0xFFFFFFFFFFFFFULL;
+    r->n[1] = a->n[0] >> 52 | ((a->n[1] << 12) & 0xFFFFFFFFFFFFFULL);
+    r->n[2] = a->n[1] >> 40 | ((a->n[2] << 24) & 0xFFFFFFFFFFFFFULL);
+    r->n[3] = a->n[2] >> 28 | ((a->n[3] << 36) & 0xFFFFFFFFFFFFFULL);
+    r->n[4] = a->n[3] >> 16;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+#endif
+}
+
+#endif
diff --git a/secp256k1/field_5x52_int128_impl.h b/secp256k1/field_5x52_int128_impl.h
new file mode 100644
index 000000000..9280bb5ea
--- /dev/null
+++ b/secp256k1/field_5x52_int128_impl.h
@@ -0,0 +1,277 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
+#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
+
+#include <stdint.h>
+
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
+    uint128_t c, d;
+    uint64_t t3, t4, tx, u0;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+    VERIFY_BITS(b[0], 56);
+    VERIFY_BITS(b[1], 56);
+    VERIFY_BITS(b[2], 56);
+    VERIFY_BITS(b[3], 56);
+    VERIFY_BITS(b[4], 52);
+    VERIFY_CHECK(r != b);
+
+    /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    d  = (uint128_t)a0 * b[3]
+       + (uint128_t)a1 * b[2]
+       + (uint128_t)a2 * b[1]
+       + (uint128_t)a3 * b[0];
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    c  = (uint128_t)a4 * b[4];
+    VERIFY_BITS(c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    d += (c & M) * R; c >>= 52;
+    VERIFY_BITS(d, 115);
+    VERIFY_BITS(c, 60);
+    /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    t3 = d & M; d >>= 52;
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS(d, 63);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    d += (uint128_t)a0 * b[4]
+       + (uint128_t)a1 * b[3]
+       + (uint128_t)a2 * b[2]
+       + (uint128_t)a3 * b[1]
+       + (uint128_t)a4 * b[0];
+    VERIFY_BITS(d, 115);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    d += c * R;
+    VERIFY_BITS(d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    t4 = d & M; d >>= 52;
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS(d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    c  = (uint128_t)a0 * b[0];
+    VERIFY_BITS(c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    d += (uint128_t)a1 * b[4]
+       + (uint128_t)a2 * b[3]
+       + (uint128_t)a3 * b[2]
+       + (uint128_t)a4 * b[1];
+    VERIFY_BITS(d, 115);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = d & M; d >>= 52;
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS(d, 63);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    c += (uint128_t)u0 * (R >> 4);
+    VERIFY_BITS(c, 115);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    r[0] = c & M; c >>= 52;
+    VERIFY_BITS(r[0], 52);
+    VERIFY_BITS(c, 61);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    c += (uint128_t)a0 * b[1]
+       + (uint128_t)a1 * b[0];
+    VERIFY_BITS(c, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    d += (uint128_t)a2 * b[4]
+       + (uint128_t)a3 * b[3]
+       + (uint128_t)a4 * b[2];
+    VERIFY_BITS(d, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    r[1] = c & M; c >>= 52;
+    VERIFY_BITS(r[1], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    c += (uint128_t)a0 * b[2]
+       + (uint128_t)a1 * b[1]
+       + (uint128_t)a2 * b[0];
+    VERIFY_BITS(c, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint128_t)a3 * b[4]
+       + (uint128_t)a4 * b[3];
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = c & M; c >>= 52;
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R + t3;;
+    VERIFY_BITS(c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = c & M; c >>= 52;
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS(c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += t4;
+    VERIFY_BITS(c, 49);
+    /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = c;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
+    uint128_t c, d;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    int64_t t3, t4, tx, u0;
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+
+    /**  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    d  = (uint128_t)(a0*2) * a3
+       + (uint128_t)(a1*2) * a2;
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    c  = (uint128_t)a4 * a4;
+    VERIFY_BITS(c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    d += (c & M) * R; c >>= 52;
+    VERIFY_BITS(d, 115);
+    VERIFY_BITS(c, 60);
+    /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    t3 = d & M; d >>= 52;
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS(d, 63);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    a4 *= 2;
+    d += (uint128_t)a0 * a4
+       + (uint128_t)(a1*2) * a3
+       + (uint128_t)a2 * a2;
+    VERIFY_BITS(d, 115);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    d += c * R;
+    VERIFY_BITS(d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    t4 = d & M; d >>= 52;
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS(d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    c  = (uint128_t)a0 * a0;
+    VERIFY_BITS(c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    d += (uint128_t)a1 * a4
+       + (uint128_t)(a2*2) * a3;
+    VERIFY_BITS(d, 114);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = d & M; d >>= 52;
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS(d, 62);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    c += (uint128_t)u0 * (R >> 4);
+    VERIFY_BITS(c, 113);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    r[0] = c & M; c >>= 52;
+    VERIFY_BITS(r[0], 52);
+    VERIFY_BITS(c, 61);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    a0 *= 2;
+    c += (uint128_t)a0 * a1;
+    VERIFY_BITS(c, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    d += (uint128_t)a2 * a4
+       + (uint128_t)a3 * a3;
+    VERIFY_BITS(d, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    r[1] = c & M; c >>= 52;
+    VERIFY_BITS(r[1], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    c += (uint128_t)a0 * a2
+       + (uint128_t)a1 * a1;
+    VERIFY_BITS(c, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint128_t)a3 * a4;
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = c & M; c >>= 52;
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c   += d * R + t3;;
+    VERIFY_BITS(c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = c & M; c >>= 52;
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS(c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += t4;
+    VERIFY_BITS(c, 49);
+    /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = c;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+#endif
diff --git a/secp256k1/field_gmp.h b/secp256k1/field_gmp.h
deleted file mode 100644
index d51dea0af..000000000
--- a/secp256k1/field_gmp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_
-#define _SECP256K1_FIELD_REPR_
-
-#include <gmp.h>
-
-#define FIELD_LIMBS ((256 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)
-
-typedef struct {
-    mp_limb_t n[FIELD_LIMBS+1];
-} secp256k1_fe_t;
-
-#endif
diff --git a/secp256k1/field_impl.h b/secp256k1/field_impl.h
new file mode 100644
index 000000000..e6ec11e8f
--- /dev/null
+++ b/secp256k1/field_impl.h
@@ -0,0 +1,263 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_IMPL_H_
+#define _SECP256K1_FIELD_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "util.h"
+
+#if defined(USE_FIELD_10X26)
+#include "field_10x26_impl.h"
+#elif defined(USE_FIELD_5X52)
+#include "field_5x52_impl.h"
+#else
+#error "Please select field implementation"
+#endif
+
+SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    secp256k1_fe_t na;
+    secp256k1_fe_negate(&na, a, 1);
+    secp256k1_fe_add(&na, b);
+    return secp256k1_fe_normalizes_to_zero_var(&na);
+}
+
+static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
+    int j;
+
+    /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
+     *  { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
+     *  1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
+     */
+
+    secp256k1_fe_sqr(&x2, a);
+    secp256k1_fe_mul(&x2, &x2, a);
+
+    secp256k1_fe_sqr(&x3, &x2);
+    secp256k1_fe_mul(&x3, &x3, a);
+
+    x6 = x3;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x6, &x6);
+    }
+    secp256k1_fe_mul(&x6, &x6, &x3);
+
+    x9 = x6;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x9, &x9);
+    }
+    secp256k1_fe_mul(&x9, &x9, &x3);
+
+    x11 = x9;
+    for (j=0; j<2; j++) {
+        secp256k1_fe_sqr(&x11, &x11);
+    }
+    secp256k1_fe_mul(&x11, &x11, &x2);
+
+    x22 = x11;
+    for (j=0; j<11; j++) {
+        secp256k1_fe_sqr(&x22, &x22);
+    }
+    secp256k1_fe_mul(&x22, &x22, &x11);
+
+    x44 = x22;
+    for (j=0; j<22; j++) {
+        secp256k1_fe_sqr(&x44, &x44);
+    }
+    secp256k1_fe_mul(&x44, &x44, &x22);
+
+    x88 = x44;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x88, &x88);
+    }
+    secp256k1_fe_mul(&x88, &x88, &x44);
+
+    x176 = x88;
+    for (j=0; j<88; j++) {
+        secp256k1_fe_sqr(&x176, &x176);
+    }
+    secp256k1_fe_mul(&x176, &x176, &x88);
+
+    x220 = x176;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x220, &x220);
+    }
+    secp256k1_fe_mul(&x220, &x220, &x44);
+
+    x223 = x220;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x223, &x223);
+    }
+    secp256k1_fe_mul(&x223, &x223, &x3);
+
+    /* The final result is then assembled using a sliding window over the blocks. */
+
+    t1 = x223;
+    for (j=0; j<23; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x22);
+    for (j=0; j<6; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x2);
+    secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_sqr(r, &t1);
+
+    /* Check that a square root was actually calculated */
+
+    secp256k1_fe_sqr(&t1, r);
+    return secp256k1_fe_equal_var(&t1, a);
+}
+
+static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
+    int j;
+
+    /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
+     *  { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
+     *  [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
+     */
+
+    secp256k1_fe_sqr(&x2, a);
+    secp256k1_fe_mul(&x2, &x2, a);
+
+    secp256k1_fe_sqr(&x3, &x2);
+    secp256k1_fe_mul(&x3, &x3, a);
+
+    x6 = x3;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x6, &x6);
+    }
+    secp256k1_fe_mul(&x6, &x6, &x3);
+
+    x9 = x6;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x9, &x9);
+    }
+    secp256k1_fe_mul(&x9, &x9, &x3);
+
+    x11 = x9;
+    for (j=0; j<2; j++) {
+        secp256k1_fe_sqr(&x11, &x11);
+    }
+    secp256k1_fe_mul(&x11, &x11, &x2);
+
+    x22 = x11;
+    for (j=0; j<11; j++) {
+        secp256k1_fe_sqr(&x22, &x22);
+    }
+    secp256k1_fe_mul(&x22, &x22, &x11);
+
+    x44 = x22;
+    for (j=0; j<22; j++) {
+        secp256k1_fe_sqr(&x44, &x44);
+    }
+    secp256k1_fe_mul(&x44, &x44, &x22);
+
+    x88 = x44;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x88, &x88);
+    }
+    secp256k1_fe_mul(&x88, &x88, &x44);
+
+    x176 = x88;
+    for (j=0; j<88; j++) {
+        secp256k1_fe_sqr(&x176, &x176);
+    }
+    secp256k1_fe_mul(&x176, &x176, &x88);
+
+    x220 = x176;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x220, &x220);
+    }
+    secp256k1_fe_mul(&x220, &x220, &x44);
+
+    x223 = x220;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x223, &x223);
+    }
+    secp256k1_fe_mul(&x223, &x223, &x3);
+
+    /* The final result is then assembled using a sliding window over the blocks. */
+
+    t1 = x223;
+    for (j=0; j<23; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x22);
+    for (j=0; j<5; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, a);
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x2);
+    for (j=0; j<2; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(r, a, &t1);
+}
+
+static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#if defined(USE_FIELD_INV_BUILTIN)
+    secp256k1_fe_inv(r, a);
+#elif defined(USE_FIELD_INV_NUM)
+    secp256k1_num_t n, m;
+    /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
+    static const unsigned char prime[32] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
+    };
+    unsigned char b[32];
+    secp256k1_fe_t c = *a;
+    secp256k1_fe_normalize_var(&c);
+    secp256k1_fe_get_b32(b, &c);
+    secp256k1_num_set_bin(&n, b, 32);
+    secp256k1_num_set_bin(&m, prime, 32);
+    secp256k1_num_mod_inverse(&n, &n, &m);
+    secp256k1_num_get_bin(b, 32, &n);
+    VERIFY_CHECK(secp256k1_fe_set_b32(r, b));
+#else
+#error "Please select field inverse implementation"
+#endif
+}
+
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t u;
+    size_t i;
+    if (len < 1) {
+        return;
+    }
+
+    VERIFY_CHECK((r + len <= a) || (a + len <= r));
+
+    r[0] = a[0];
+
+    i = 0;
+    while (++i < len) {
+        secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]);
+    }
+
+    secp256k1_fe_inv_var(&u, &r[--i]);
+
+    while (i > 0) {
+        int j = i--;
+        secp256k1_fe_mul(&r[j], &r[i], &u);
+        secp256k1_fe_mul(&u, &u, &a[j]);
+    }
+
+    r[0] = u;
+}
+
+#endif
diff --git a/secp256k1/group.h b/secp256k1/group.h
index fc02a4249..0b08b3b99 100644
--- a/secp256k1/group.h
+++ b/secp256k1/group.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_GROUP_
 #define _SECP256K1_GROUP_
@@ -12,99 +14,108 @@
 typedef struct {
     secp256k1_fe_t x;
     secp256k1_fe_t y;
-    int infinity; // whether this represents the point at infinity
+    int infinity; /* whether this represents the point at infinity */
 } secp256k1_ge_t;
 
+#define SECP256K1_GE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), 0}
+#define SECP256K1_GE_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
+
 /** A group element of the secp256k1 curve, in jacobian coordinates. */
 typedef struct {
-    secp256k1_fe_t x; // actual X: x/z^2
-    secp256k1_fe_t y; // actual Y: y/z^3
+    secp256k1_fe_t x; /* actual X: x/z^2 */
+    secp256k1_fe_t y; /* actual Y: y/z^3 */
     secp256k1_fe_t z;
-    int infinity; // whether this represents the point at infinity
+    int infinity; /* whether this represents the point at infinity */
 } secp256k1_gej_t;
 
-/** Global constants related to the group */
-typedef struct {
-    secp256k1_num_t order; // the order of the curve (= order of its generator)
-    secp256k1_num_t half_order; // half the order of the curve (= order of its generator)
-    secp256k1_ge_t g; // the generator point
-
-    // constants related to secp256k1's efficiently computable endomorphism
-    secp256k1_fe_t beta;
-    secp256k1_num_t lambda, a1b2, b1, a2;
-} secp256k1_ge_consts_t;
-
-static const secp256k1_ge_consts_t *secp256k1_ge_consts = NULL;
+#define SECP256K1_GEJ_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1), 0}
+#define SECP256K1_GEJ_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
 
-/** Initialize the group module. */
-void static secp256k1_ge_start(void);
+typedef struct {
+    secp256k1_fe_storage_t x;
+    secp256k1_fe_storage_t y;
+} secp256k1_ge_storage_t;
 
-/** De-initialize the group module. */
-void static secp256k1_ge_stop(void);
+#define SECP256K1_GE_STORAGE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_STORAGE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_STORAGE_CONST((i),(j),(k),(l),(m),(n),(o),(p))}
 
 /** Set a group element equal to the point at infinity */
-void static secp256k1_ge_set_infinity(secp256k1_ge_t *r);
+static void secp256k1_ge_set_infinity(secp256k1_ge_t *r);
 
 /** Set a group element equal to the point with given X and Y coordinates */
-void static secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
+static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
 
-/** Set a group element (jacobian) equal to the point with given X coordinate, and given oddness for Y.
-    The result is not guaranteed to be valid. */
-void static secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd);
+/** Set a group element (affine) equal to the point with the given X coordinate, and given oddness
+ *  for Y. Return value indicates whether the result is valid. */
+static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd);
 
 /** Check whether a group element is the point at infinity. */
-int  static secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
+static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
 
 /** Check whether a group element is valid (i.e., on the curve). */
-int  static secp256k1_ge_is_valid(const secp256k1_ge_t *a);
-
-void static secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
+static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a);
 
-/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
-void static secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a);
+static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
 
 /** Set a group element equal to another which is given in jacobian coordinates */
-void static secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
+static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
+
+/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a);
 
 
 /** Set a group element (jacobian) equal to the point at infinity. */
-void static secp256k1_gej_set_infinity(secp256k1_gej_t *r);
+static void secp256k1_gej_set_infinity(secp256k1_gej_t *r);
 
 /** Set a group element (jacobian) equal to the point with given X and Y coordinates. */
-void static secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
+static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
 
 /** Set a group element (jacobian) equal to another which is given in affine coordinates. */
-void static secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a);
+static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a);
 
-/** Get the X coordinate of a group element (jacobian). */
-void static secp256k1_gej_get_x(secp256k1_fe_t *r, const secp256k1_gej_t *a);
+/** Compare the X coordinate of a group element (jacobian). */
+static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a);
 
 /** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
-void static secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 
 /** Check whether a group element is the point at infinity. */
-int  static secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
+static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
 
 /** Set r equal to the double of a. */
-void static secp256k1_gej_double(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 
 /** Set r equal to the sum of a and b. */
-void static secp256k1_gej_add(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b);
+static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b);
 
-/** Set r equal to the sum of a and b (with b given in jacobian coordinates). This is more efficient
-    than secp256k1_gej_add. */
-void static secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
+/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */
+static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
 
-/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
-void static secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a);
+/** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient
+    than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time
+    guarantee, and b is allowed to be infinity. */
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
 
 #ifdef USE_ENDOMORPHISM
 /** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
-void static secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
-
-/** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (given that a is
-    not more than 256 bits). */
-void static secp256k1_gej_split_exp(secp256k1_num_t *r1, secp256k1_num_t *r2, const secp256k1_num_t *a);
+static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 #endif
 
+/** Clear a secp256k1_gej_t to prevent leaking sensitive information. */
+static void secp256k1_gej_clear(secp256k1_gej_t *r);
+
+/** Clear a secp256k1_ge_t to prevent leaking sensitive information. */
+static void secp256k1_ge_clear(secp256k1_ge_t *r);
+
+/** Convert a group element to the storage type. */
+static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t*);
+
+/** Convert a group element back from the storage type. */
+static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t*);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag);
+
+/** Rescale a jacobian point by b which must be non-zero. Constant-time. */
+static void secp256k1_gej_rescale(secp256k1_gej_t *r, const secp256k1_fe_t *b);
+
 #endif
diff --git a/secp256k1/group_impl.h b/secp256k1/group_impl.h
new file mode 100644
index 000000000..0f64576fb
--- /dev/null
+++ b/secp256k1/group_impl.h
@@ -0,0 +1,443 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_GROUP_IMPL_H_
+#define _SECP256K1_GROUP_IMPL_H_
+
+#include <string.h>
+
+#include "num.h"
+#include "field.h"
+#include "group.h"
+
+/** Generator for secp256k1, value 'g' defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ */
+static const secp256k1_ge_t secp256k1_ge_const_g = SECP256K1_GE_CONST(
+    0x79BE667EUL, 0xF9DCBBACUL, 0x55A06295UL, 0xCE870B07UL,
+    0x029BFCDBUL, 0x2DCE28D9UL, 0x59F2815BUL, 0x16F81798UL,
+    0x483ADA77UL, 0x26A3C465UL, 0x5DA4FBFCUL, 0x0E1108A8UL,
+    0xFD17B448UL, 0xA6855419UL, 0x9C47D08FUL, 0xFB10D4B8UL
+);
+
+static void secp256k1_ge_set_infinity(secp256k1_ge_t *r) {
+    r->infinity = 1;
+}
+
+static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
+    r->infinity = 0;
+    r->x = *x;
+    r->y = *y;
+}
+
+static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) {
+    return a->infinity;
+}
+
+static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
+    *r = *a;
+    secp256k1_fe_normalize_weak(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+}
+
+static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    secp256k1_fe_t z2, z3;
+    r->infinity = a->infinity;
+    secp256k1_fe_inv(&a->z, &a->z);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    r->x = a->x;
+    r->y = a->y;
+}
+
+static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    secp256k1_fe_t z2, z3;
+    r->infinity = a->infinity;
+    if (a->infinity) {
+        return;
+    }
+    secp256k1_fe_inv_var(&a->z, &a->z);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    r->x = a->x;
+    r->y = a->y;
+}
+
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a) {
+    secp256k1_fe_t *az;
+    secp256k1_fe_t *azi;
+    size_t i;
+    size_t count = 0;
+    az = (secp256k1_fe_t *)checked_malloc(sizeof(secp256k1_fe_t) * len);
+    for (i = 0; i < len; i++) {
+        if (!a[i].infinity) {
+            az[count++] = a[i].z;
+        }
+    }
+
+    azi = (secp256k1_fe_t *)checked_malloc(sizeof(secp256k1_fe_t) * count);
+    secp256k1_fe_inv_all_var(count, azi, az);
+    free(az);
+
+    count = 0;
+    for (i = 0; i < len; i++) {
+        r[i].infinity = a[i].infinity;
+        if (!a[i].infinity) {
+            secp256k1_fe_t zi2, zi3;
+            secp256k1_fe_t *zi = &azi[count++];
+            secp256k1_fe_sqr(&zi2, zi);
+            secp256k1_fe_mul(&zi3, &zi2, zi);
+            secp256k1_fe_mul(&r[i].x, &a[i].x, &zi2);
+            secp256k1_fe_mul(&r[i].y, &a[i].y, &zi3);
+        }
+    }
+    free(azi);
+}
+
+static void secp256k1_gej_set_infinity(secp256k1_gej_t *r) {
+    r->infinity = 1;
+    secp256k1_fe_set_int(&r->x, 0);
+    secp256k1_fe_set_int(&r->y, 0);
+    secp256k1_fe_set_int(&r->z, 0);
+}
+
+static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
+    r->infinity = 0;
+    r->x = *x;
+    r->y = *y;
+    secp256k1_fe_set_int(&r->z, 1);
+}
+
+static void secp256k1_gej_clear(secp256k1_gej_t *r) {
+    r->infinity = 0;
+    secp256k1_fe_clear(&r->x);
+    secp256k1_fe_clear(&r->y);
+    secp256k1_fe_clear(&r->z);
+}
+
+static void secp256k1_ge_clear(secp256k1_ge_t *r) {
+    r->infinity = 0;
+    secp256k1_fe_clear(&r->x);
+    secp256k1_fe_clear(&r->y);
+}
+
+static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) {
+    secp256k1_fe_t x2, x3, c;
+    r->x = *x;
+    secp256k1_fe_sqr(&x2, x);
+    secp256k1_fe_mul(&x3, x, &x2);
+    r->infinity = 0;
+    secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_add(&c, &x3);
+    if (!secp256k1_fe_sqrt_var(&r->y, &c)) {
+        return 0;
+    }
+    secp256k1_fe_normalize_var(&r->y);
+    if (secp256k1_fe_is_odd(&r->y) != odd) {
+        secp256k1_fe_negate(&r->y, &r->y, 1);
+    }
+    return 1;
+}
+
+static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
+   r->infinity = a->infinity;
+   r->x = a->x;
+   r->y = a->y;
+   secp256k1_fe_set_int(&r->z, 1);
+}
+
+static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a) {
+    secp256k1_fe_t r, r2;
+    VERIFY_CHECK(!a->infinity);
+    secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
+    r2 = a->x; secp256k1_fe_normalize_weak(&r2);
+    return secp256k1_fe_equal_var(&r, &r2);
+}
+
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    r->infinity = a->infinity;
+    r->x = a->x;
+    r->y = a->y;
+    r->z = a->z;
+    secp256k1_fe_normalize_weak(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+}
+
+static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a) {
+    return a->infinity;
+}
+
+static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) {
+    secp256k1_fe_t y2, x3, z2, z6;
+    if (a->infinity) {
+        return 0;
+    }
+    /** y^2 = x^3 + 7
+     *  (Y/Z^3)^2 = (X/Z^2)^3 + 7
+     *  Y^2 / Z^6 = X^3 / Z^6 + 7
+     *  Y^2 = X^3 + 7*Z^6
+     */
+    secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
+    secp256k1_fe_mul_int(&z6, 7);
+    secp256k1_fe_add(&x3, &z6);
+    secp256k1_fe_normalize_weak(&x3);
+    return secp256k1_fe_equal_var(&y2, &x3);
+}
+
+static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) {
+    secp256k1_fe_t y2, x3, c;
+    if (a->infinity) {
+        return 0;
+    }
+    /* y^2 = x^3 + 7 */
+    secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_add(&x3, &c);
+    secp256k1_fe_normalize_weak(&x3);
+    return secp256k1_fe_equal_var(&y2, &x3);
+}
+
+static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t t1,t2,t3,t4;
+    /** For secp256k1, 2Q is infinity if and only if Q is infinity. This is because if 2Q = infinity,
+     *  Q must equal -Q, or that Q.y == -(Q.y), or Q.y is 0. For a point on y^2 = x^3 + 7 to have
+     *  y=0, x^3 must be -7 mod p. However, -7 has no cube root mod p.
+     */
+    r->infinity = a->infinity;
+    if (r->infinity) {
+        return;
+    }
+
+    secp256k1_fe_mul(&r->z, &a->z, &a->y);
+    secp256k1_fe_mul_int(&r->z, 2);       /* Z' = 2*Y*Z (2) */
+    secp256k1_fe_sqr(&t1, &a->x);
+    secp256k1_fe_mul_int(&t1, 3);         /* T1 = 3*X^2 (3) */
+    secp256k1_fe_sqr(&t2, &t1);           /* T2 = 9*X^4 (1) */
+    secp256k1_fe_sqr(&t3, &a->y);
+    secp256k1_fe_mul_int(&t3, 2);         /* T3 = 2*Y^2 (2) */
+    secp256k1_fe_sqr(&t4, &t3);
+    secp256k1_fe_mul_int(&t4, 2);         /* T4 = 8*Y^4 (2) */
+    secp256k1_fe_mul(&t3, &t3, &a->x);    /* T3 = 2*X*Y^2 (1) */
+    r->x = t3;
+    secp256k1_fe_mul_int(&r->x, 4);       /* X' = 8*X*Y^2 (4) */
+    secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */
+    secp256k1_fe_add(&r->x, &t2);         /* X' = 9*X^4 - 8*X*Y^2 (6) */
+    secp256k1_fe_negate(&t2, &t2, 1);     /* T2 = -9*X^4 (2) */
+    secp256k1_fe_mul_int(&t3, 6);         /* T3 = 12*X*Y^2 (6) */
+    secp256k1_fe_add(&t3, &t2);           /* T3 = 12*X*Y^2 - 9*X^4 (8) */
+    secp256k1_fe_mul(&r->y, &t1, &t3);    /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */
+    secp256k1_fe_negate(&t2, &t4, 2);     /* T2 = -8*Y^4 (3) */
+    secp256k1_fe_add(&r->y, &t2);         /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
+}
+
+static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b) {
+    /* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
+    if (a->infinity) {
+        *r = *b;
+        return;
+    }
+    if (b->infinity) {
+        *r = *a;
+        return;
+    }
+    r->infinity = 0;
+    secp256k1_fe_sqr(&z22, &b->z);
+    secp256k1_fe_sqr(&z12, &a->z);
+    secp256k1_fe_mul(&u1, &a->x, &z22);
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
+            secp256k1_gej_double_var(r, a);
+        } else {
+            r->infinity = 1;
+        }
+        return;
+    }
+    secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_mul(&h3, &h, &h2);
+    secp256k1_fe_mul(&r->z, &a->z, &b->z); secp256k1_fe_mul(&r->z, &r->z, &h);
+    secp256k1_fe_mul(&t, &u1, &h2);
+    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
+    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
+    secp256k1_fe_add(&r->y, &h3);
+}
+
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    /* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
+    if (a->infinity) {
+        r->infinity = b->infinity;
+        r->x = b->x;
+        r->y = b->y;
+        secp256k1_fe_set_int(&r->z, 1);
+        return;
+    }
+    if (b->infinity) {
+        *r = *a;
+        return;
+    }
+    r->infinity = 0;
+    secp256k1_fe_sqr(&z12, &a->z);
+    u1 = a->x; secp256k1_fe_normalize_weak(&u1);
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    s1 = a->y; secp256k1_fe_normalize_weak(&s1);
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
+            secp256k1_gej_double_var(r, a);
+        } else {
+            r->infinity = 1;
+        }
+        return;
+    }
+    secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_mul(&h3, &h, &h2);
+    r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
+    secp256k1_fe_mul(&t, &u1, &h2);
+    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
+    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
+    secp256k1_fe_add(&r->y, &h3);
+}
+
+static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    /* Operations: 7 mul, 5 sqr, 5 normalize, 17 mul_int/add/negate/cmov */
+    static const secp256k1_fe_t fe_1 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+    secp256k1_fe_t zz, u1, u2, s1, s2, z, t, m, n, q, rr;
+    int infinity;
+    VERIFY_CHECK(!b->infinity);
+    VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);
+
+    /** In:
+     *    Eric Brier and Marc Joye, Weierstrass Elliptic Curves and Side-Channel Attacks.
+     *    In D. Naccache and P. Paillier, Eds., Public Key Cryptography, vol. 2274 of Lecture Notes in Computer Science, pages 335-345. Springer-Verlag, 2002.
+     *  we find as solution for a unified addition/doubling formula:
+     *    lambda = ((x1 + x2)^2 - x1 * x2 + a) / (y1 + y2), with a = 0 for secp256k1's curve equation.
+     *    x3 = lambda^2 - (x1 + x2)
+     *    2*y3 = lambda * (x1 + x2 - 2 * x3) - (y1 + y2).
+     *
+     *  Substituting x_i = Xi / Zi^2 and yi = Yi / Zi^3, for i=1,2,3, gives:
+     *    U1 = X1*Z2^2, U2 = X2*Z1^2
+     *    S1 = Y1*Z2^3, S2 = Y2*Z1^3
+     *    Z = Z1*Z2
+     *    T = U1+U2
+     *    M = S1+S2
+     *    Q = T*M^2
+     *    R = T^2-U1*U2
+     *    X3 = 4*(R^2-Q)
+     *    Y3 = 4*(R*(3*Q-2*R^2)-M^4)
+     *    Z3 = 2*M*Z
+     *  (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
+     */
+
+    secp256k1_fe_sqr(&zz, &a->z);                       /* z = Z1^2 */
+    u1 = a->x; secp256k1_fe_normalize_weak(&u1);        /* u1 = U1 = X1*Z2^2 (1) */
+    secp256k1_fe_mul(&u2, &b->x, &zz);                  /* u2 = U2 = X2*Z1^2 (1) */
+    s1 = a->y; secp256k1_fe_normalize_weak(&s1);        /* s1 = S1 = Y1*Z2^3 (1) */
+    secp256k1_fe_mul(&s2, &b->y, &zz);                  /* s2 = Y2*Z2^2 (1) */
+    secp256k1_fe_mul(&s2, &s2, &a->z);                  /* s2 = S2 = Y2*Z1^3 (1) */
+    z = a->z;                                           /* z = Z = Z1*Z2 (8) */
+    t = u1; secp256k1_fe_add(&t, &u2);                  /* t = T = U1+U2 (2) */
+    m = s1; secp256k1_fe_add(&m, &s2);                  /* m = M = S1+S2 (2) */
+    secp256k1_fe_sqr(&n, &m);                           /* n = M^2 (1) */
+    secp256k1_fe_mul(&q, &n, &t);                       /* q = Q = T*M^2 (1) */
+    secp256k1_fe_sqr(&n, &n);                           /* n = M^4 (1) */
+    secp256k1_fe_sqr(&rr, &t);                          /* rr = T^2 (1) */
+    secp256k1_fe_mul(&t, &u1, &u2); secp256k1_fe_negate(&t, &t, 1); /* t = -U1*U2 (2) */
+    secp256k1_fe_add(&rr, &t);                                      /* rr = R = T^2-U1*U2 (3) */
+    secp256k1_fe_sqr(&t, &rr);                                      /* t = R^2 (1) */
+    secp256k1_fe_mul(&r->z, &m, &z);                                /* r->z = M*Z (1) */
+    infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity);
+    secp256k1_fe_mul_int(&r->z, 2 * (1 - a->infinity)); /* r->z = Z3 = 2*M*Z (2) */
+    r->x = t;                                           /* r->x = R^2 (1) */
+    secp256k1_fe_negate(&q, &q, 1);                     /* q = -Q (2) */
+    secp256k1_fe_add(&r->x, &q);                        /* r->x = R^2-Q (3) */
+    secp256k1_fe_normalize(&r->x);
+    secp256k1_fe_mul_int(&q, 3);                        /* q = -3*Q (6) */
+    secp256k1_fe_mul_int(&t, 2);                        /* t = 2*R^2 (2) */
+    secp256k1_fe_add(&t, &q);                           /* t = 2*R^2-3*Q (8) */
+    secp256k1_fe_mul(&t, &t, &rr);                      /* t = R*(2*R^2-3*Q) (1) */
+    secp256k1_fe_add(&t, &n);                           /* t = R*(2*R^2-3*Q)+M^4 (2) */
+    secp256k1_fe_negate(&r->y, &t, 2);                  /* r->y = R*(3*Q-2*R^2)-M^4 (3) */
+    secp256k1_fe_normalize_weak(&r->y);
+    secp256k1_fe_mul_int(&r->x, 4 * (1 - a->infinity)); /* r->x = X3 = 4*(R^2-Q) */
+    secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */
+
+    /** In case a->infinity == 1, the above code results in r->x, r->y, and r->z all equal to 0.
+     *  Replace r with b->x, b->y, 1 in that case.
+     */
+    secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
+    secp256k1_fe_cmov(&r->y, &b->y, a->infinity);
+    secp256k1_fe_cmov(&r->z, &fe_1, a->infinity);
+    r->infinity = infinity;
+}
+
+static void secp256k1_gej_rescale(secp256k1_gej_t *r, const secp256k1_fe_t *s) {
+    /* Operations: 4 mul, 1 sqr */
+    secp256k1_fe_t zz;
+    VERIFY_CHECK(!secp256k1_fe_is_zero(s));
+    secp256k1_fe_sqr(&zz, s);
+    secp256k1_fe_mul(&r->x, &r->x, &zz);                /* r->x *= s^2 */
+    secp256k1_fe_mul(&r->y, &r->y, &zz);
+    secp256k1_fe_mul(&r->y, &r->y, s);                  /* r->y *= s^3 */
+    secp256k1_fe_mul(&r->z, &r->z, s);                  /* r->z *= s   */
+}
+
+static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t *a) {
+    secp256k1_fe_t x, y;
+    VERIFY_CHECK(!a->infinity);
+    x = a->x;
+    secp256k1_fe_normalize(&x);
+    y = a->y;
+    secp256k1_fe_normalize(&y);
+    secp256k1_fe_to_storage(&r->x, &x);
+    secp256k1_fe_to_storage(&r->y, &y);
+}
+
+static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t *a) {
+    secp256k1_fe_from_storage(&r->x, &a->x);
+    secp256k1_fe_from_storage(&r->y, &a->y);
+    r->infinity = 0;
+}
+
+static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag) {
+    secp256k1_fe_storage_cmov(&r->x, &a->x, flag);
+    secp256k1_fe_storage_cmov(&r->y, &a->y, flag);
+}
+
+#ifdef USE_ENDOMORPHISM
+static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    static const secp256k1_fe_t beta = SECP256K1_FE_CONST(
+        0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul,
+        0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul
+    );
+    *r = *a;
+    secp256k1_fe_mul(&r->x, &r->x, &beta);
+}
+#endif
+
+#endif
diff --git a/secp256k1/hash.h b/secp256k1/hash.h
new file mode 100644
index 000000000..843423d7f
--- /dev/null
+++ b/secp256k1/hash.h
@@ -0,0 +1,41 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_HASH_
+#define _SECP256K1_HASH_
+
+#include <stdlib.h>
+#include <stdint.h>
+
+typedef struct {
+    uint32_t s[32];
+    uint32_t buf[16]; /* In big endian */
+    size_t bytes;
+} secp256k1_sha256_t;
+
+static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash);
+static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t size);
+static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32);
+
+typedef struct {
+    secp256k1_sha256_t inner, outer;
+} secp256k1_hmac_sha256_t;
+
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t size);
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size);
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32);
+
+typedef struct {
+    unsigned char v[32];
+    unsigned char k[32];
+    int retry;
+} secp256k1_rfc6979_hmac_sha256_t;
+
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen, const unsigned char *rnd, size_t rndlen);
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen);
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng);
+
+#endif
diff --git a/secp256k1/hash_impl.h b/secp256k1/hash_impl.h
new file mode 100644
index 000000000..9828827bc
--- /dev/null
+++ b/secp256k1/hash_impl.h
@@ -0,0 +1,293 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_HASH_IMPL_H_
+#define _SECP256K1_HASH_IMPL_H_
+
+#include "hash.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z) (((x) & (y)) | ((z) & ((x) | (y))))
+#define Sigma0(x) (((x) >> 2 | (x) << 30) ^ ((x) >> 13 | (x) << 19) ^ ((x) >> 22 | (x) << 10))
+#define Sigma1(x) (((x) >> 6 | (x) << 26) ^ ((x) >> 11 | (x) << 21) ^ ((x) >> 25 | (x) << 7))
+#define sigma0(x) (((x) >> 7 | (x) << 25) ^ ((x) >> 18 | (x) << 14) ^ ((x) >> 3))
+#define sigma1(x) (((x) >> 17 | (x) << 15) ^ ((x) >> 19 | (x) << 13) ^ ((x) >> 10))
+
+#define Round(a,b,c,d,e,f,g,h,k,w) do { \
+    uint32_t t1 = (h) + Sigma1(e) + Ch((e), (f), (g)) + (k) + (w); \
+    uint32_t t2 = Sigma0(a) + Maj((a), (b), (c)); \
+    (d) += t1; \
+    (h) = t1 + t2; \
+} while(0)
+
+#ifdef WORDS_BIGENDIAN
+#define BE32(x) (x)
+#else
+#define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
+#endif
+
+static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) {
+    hash->s[0] = 0x6a09e667ul;
+    hash->s[1] = 0xbb67ae85ul;
+    hash->s[2] = 0x3c6ef372ul;
+    hash->s[3] = 0xa54ff53aul;
+    hash->s[4] = 0x510e527ful;
+    hash->s[5] = 0x9b05688cul;
+    hash->s[6] = 0x1f83d9abul;
+    hash->s[7] = 0x5be0cd19ul;
+    hash->bytes = 0;
+}
+
+/** Perform one SHA-256 transformation, processing 16 big endian 32-bit words. */
+static void secp256k1_sha256_transform(uint32_t* s, const uint32_t* chunk) {
+    uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
+    uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+    Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = BE32(chunk[0]));
+    Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = BE32(chunk[1]));
+    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = BE32(chunk[2]));
+    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = BE32(chunk[3]));
+    Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = BE32(chunk[4]));
+    Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = BE32(chunk[5]));
+    Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = BE32(chunk[6]));
+    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = BE32(chunk[7]));
+    Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = BE32(chunk[8]));
+    Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = BE32(chunk[9]));
+    Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = BE32(chunk[10]));
+    Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = BE32(chunk[11]));
+    Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = BE32(chunk[12]));
+    Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = BE32(chunk[13]));
+    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = BE32(chunk[14]));
+    Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = BE32(chunk[15]));
+
+    Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x0fc19dc6, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x240ca1cc, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x2de92c6f, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4a7484aa, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x76f988da, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x983e5152, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa831c66d, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xb00327c8, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xbf597fc7, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xc6e00bf3, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd5a79147, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0x06ca6351, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x14292967, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x27b70a85, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x2e1b2138, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x53380d13, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x650a7354, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x766a0abb, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x81c2c92e, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x92722c85, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa81a664b, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xc24b8b70, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xc76c51a3, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xd192e819, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd6990624, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xf40e3585, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x106aa070, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x19a4c116, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x1e376c08, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x2748774c, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x391c0cb3, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5b9cca4f, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x682e6ff3, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x748f82ee, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0x78a5636f, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0x84c87814, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0x8cc70208, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0x90befffa, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xa4506ceb, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7, w14 + sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0xc67178f2, w15 + sigma1(w13) + w8 + sigma0(w0));
+
+    s[0] += a;
+    s[1] += b;
+    s[2] += c;
+    s[3] += d;
+    s[4] += e;
+    s[5] += f;
+    s[6] += g;
+    s[7] += h;
+}
+
+static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t len) {
+    size_t bufsize = hash->bytes & 0x3F;
+    hash->bytes += len;
+    while (bufsize + len >= 64) {
+        /* Fill the buffer, and process it. */
+        memcpy(((unsigned char*)hash->buf) + bufsize, data, 64 - bufsize);
+        data += 64 - bufsize;
+        len -= 64 - bufsize;
+        secp256k1_sha256_transform(hash->s, hash->buf);
+        bufsize = 0;
+    }
+    if (len) {
+        /* Fill the buffer with what remains. */
+        memcpy(((unsigned char*)hash->buf) + bufsize, data, len);
+    }
+}
+
+static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32) {
+    static const unsigned char pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    uint32_t sizedesc[2];
+    uint32_t out[8];
+    int i = 0;
+    sizedesc[0] = BE32(hash->bytes >> 29);
+    sizedesc[1] = BE32(hash->bytes << 3);
+    secp256k1_sha256_write(hash, pad, 1 + ((119 - (hash->bytes % 64)) % 64));
+    secp256k1_sha256_write(hash, (const unsigned char*)sizedesc, 8);
+    for (i = 0; i < 8; i++) {
+        out[i] = BE32(hash->s[i]);
+        hash->s[i] = 0;
+    }
+    memcpy(out32, (const unsigned char*)out, 32);
+}
+
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t keylen) {
+    int n;
+    unsigned char rkey[64];
+    if (keylen <= 64) {
+        memcpy(rkey, key, keylen);
+        memset(rkey + keylen, 0, 64 - keylen);
+    } else {
+        secp256k1_sha256_t sha256;
+        secp256k1_sha256_initialize(&sha256);
+        secp256k1_sha256_write(&sha256, key, keylen);
+        secp256k1_sha256_finalize(&sha256, rkey);
+        memset(rkey + 32, 0, 32);
+    }
+
+    secp256k1_sha256_initialize(&hash->outer);
+    for (n = 0; n < 64; n++) {
+        rkey[n] ^= 0x5c;
+    }
+    secp256k1_sha256_write(&hash->outer, rkey, 64);
+
+    secp256k1_sha256_initialize(&hash->inner);
+    for (n = 0; n < 64; n++) {
+        rkey[n] ^= 0x5c ^ 0x36;
+    }
+    secp256k1_sha256_write(&hash->inner, rkey, 64);
+    memset(rkey, 0, 64);
+}
+
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size) {
+    secp256k1_sha256_write(&hash->inner, data, size);
+}
+
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32) {
+    unsigned char temp[32];
+    secp256k1_sha256_finalize(&hash->inner, temp);
+    secp256k1_sha256_write(&hash->outer, temp, 32);
+    memset(temp, 0, 32);
+    secp256k1_sha256_finalize(&hash->outer, out32);
+}
+
+
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen, const unsigned char *rnd, size_t rndlen) {
+    secp256k1_hmac_sha256_t hmac;
+    static const unsigned char zero[1] = {0x00};
+    static const unsigned char one[1] = {0x01};
+
+    memset(rng->v, 0x01, 32); /* RFC6979 3.2.b. */
+    memset(rng->k, 0x00, 32); /* RFC6979 3.2.c. */
+
+    /* RFC6979 3.2.d. */
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_write(&hmac, zero, 1);
+    secp256k1_hmac_sha256_write(&hmac, key, keylen);
+    secp256k1_hmac_sha256_write(&hmac, msg, msglen);
+    if (rnd && rndlen) {
+        /* RFC6979 3.6 "Additional data". */
+        secp256k1_hmac_sha256_write(&hmac, rnd, rndlen);
+    }
+    secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+
+    /* RFC6979 3.2.f. */
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_write(&hmac, one, 1);
+    secp256k1_hmac_sha256_write(&hmac, key, keylen);
+    secp256k1_hmac_sha256_write(&hmac, msg, msglen);
+    if (rnd && rndlen) {
+        /* RFC6979 3.6 "Additional data". */
+        secp256k1_hmac_sha256_write(&hmac, rnd, rndlen);
+    }
+    secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+    rng->retry = 0;
+}
+
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen) {
+    /* RFC6979 3.2.h. */
+    static const unsigned char zero[1] = {0x00};
+    if (rng->retry) {
+        secp256k1_hmac_sha256_t hmac;
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_write(&hmac, zero, 1);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+    }
+
+    while (outlen > 0) {
+        secp256k1_hmac_sha256_t hmac;
+        int now = outlen;
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+        if (now > 32) {
+            now = 32;
+        }
+        memcpy(out, rng->v, now);
+        out += now;
+        outlen -= now;
+    }
+
+    rng->retry = 1;
+}
+
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng) {
+    memset(rng->k, 0, 32);
+    memset(rng->v, 0, 32);
+    rng->retry = 0;
+}
+
+
+#undef Round
+#undef sigma0
+#undef sigma1
+#undef Sigma0
+#undef Sigma1
+#undef Ch
+#undef Maj
+#undef ReadBE32
+#undef WriteBE32
+
+#endif
diff --git a/secp256k1/impl/ecdsa.h b/secp256k1/impl/ecdsa.h
deleted file mode 100644
index b001e263f..000000000
--- a/secp256k1/impl/ecdsa.h
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_ECDSA_IMPL_H_
-#define _SECP256K1_ECDSA_IMPL_H_
-
-#include "../num.h"
-#include "../field.h"
-#include "../group.h"
-#include "../ecmult.h"
-#include "../ecdsa.h"
-
-void static secp256k1_ecdsa_sig_init(secp256k1_ecdsa_sig_t *r) {
-    secp256k1_num_init(&r->r);
-    secp256k1_num_init(&r->s);
-}
-
-void static secp256k1_ecdsa_sig_free(secp256k1_ecdsa_sig_t *r) {
-    secp256k1_num_free(&r->r);
-    secp256k1_num_free(&r->s);
-}
-
-int static secp256k1_ecdsa_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) {
-    if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) {
-        secp256k1_fe_t x;
-        secp256k1_fe_set_b32(&x, pub+1);
-        secp256k1_ge_set_xo(elem, &x, pub[0] == 0x03);
-    } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) {
-        secp256k1_fe_t x, y;
-        secp256k1_fe_set_b32(&x, pub+1);
-        secp256k1_fe_set_b32(&y, pub+33);
-        secp256k1_ge_set_xy(elem, &x, &y);
-        if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07))
-            return 0;
-    } else {
-        return 0;
-    }
-    return secp256k1_ge_is_valid(elem);
-}
-
-int static secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size) {
-    if (sig[0] != 0x30) return 0;
-    int lenr = sig[3];
-    if (5+lenr >= size) return 0;
-    int lens = sig[lenr+5];
-    if (sig[1] != lenr+lens+4) return 0;
-    if (lenr+lens+6 > size) return 0;
-    if (sig[2] != 0x02) return 0;
-    if (lenr == 0) return 0;
-    if (sig[lenr+4] != 0x02) return 0;
-    if (lens == 0) return 0;
-    secp256k1_num_set_bin(&r->r, sig+4, lenr);
-    secp256k1_num_set_bin(&r->s, sig+6+lenr, lens);
-    return 1;
-}
-
-int static secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a) {
-    int lenR = (secp256k1_num_bits(&a->r) + 7)/8;
-    if (lenR == 0 || secp256k1_num_get_bit(&a->r, lenR*8-1))
-        lenR++;
-    int lenS = (secp256k1_num_bits(&a->s) + 7)/8;
-    if (lenS == 0 || secp256k1_num_get_bit(&a->s, lenS*8-1))
-        lenS++;
-    if (*size < 6+lenS+lenR)
-        return 0;
-    *size = 6 + lenS + lenR;
-    sig[0] = 0x30;
-    sig[1] = 4 + lenS + lenR;
-    sig[2] = 0x02;
-    sig[3] = lenR;
-    secp256k1_num_get_bin(sig+4, lenR, &a->r);
-    sig[4+lenR] = 0x02;
-    sig[5+lenR] = lenS;
-    secp256k1_num_get_bin(sig+lenR+6, lenS, &a->s);
-    return 1;
-}
-
-int static secp256k1_ecdsa_sig_recompute(secp256k1_num_t *r2, const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message) {
-    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
-
-    if (secp256k1_num_is_neg(&sig->r) || secp256k1_num_is_neg(&sig->s))
-        return 0;
-    if (secp256k1_num_is_zero(&sig->r) || secp256k1_num_is_zero(&sig->s))
-        return 0;
-    if (secp256k1_num_cmp(&sig->r, &c->order) >= 0 || secp256k1_num_cmp(&sig->s, &c->order) >= 0)
-        return 0;
-
-    int ret = 0;
-    secp256k1_num_t sn, u1, u2;
-    secp256k1_num_init(&sn);
-    secp256k1_num_init(&u1);
-    secp256k1_num_init(&u2);
-    secp256k1_num_mod_inverse(&sn, &sig->s, &c->order);
-    secp256k1_num_mod_mul(&u1, &sn, message, &c->order);
-    secp256k1_num_mod_mul(&u2, &sn, &sig->r, &c->order);
-    secp256k1_gej_t pubkeyj; secp256k1_gej_set_ge(&pubkeyj, pubkey);
-    secp256k1_gej_t pr; secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
-    if (!secp256k1_gej_is_infinity(&pr)) {
-        secp256k1_fe_t xr; secp256k1_gej_get_x(&xr, &pr);
-        secp256k1_fe_normalize(&xr);
-        unsigned char xrb[32]; secp256k1_fe_get_b32(xrb, &xr);
-        secp256k1_num_set_bin(r2, xrb, 32);
-        secp256k1_num_mod(r2, &c->order);
-        ret = 1;
-    }
-    secp256k1_num_free(&sn);
-    secp256k1_num_free(&u1);
-    secp256k1_num_free(&u2);
-    return ret;
-}
-
-int static secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_num_t *message, int recid) {
-    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
-
-    if (secp256k1_num_is_neg(&sig->r) || secp256k1_num_is_neg(&sig->s))
-        return 0;
-    if (secp256k1_num_is_zero(&sig->r) || secp256k1_num_is_zero(&sig->s))
-        return 0;
-    if (secp256k1_num_cmp(&sig->r, &c->order) >= 0 || secp256k1_num_cmp(&sig->s, &c->order) >= 0)
-        return 0;
-
-    secp256k1_num_t rx;
-    secp256k1_num_init(&rx);
-    secp256k1_num_copy(&rx, &sig->r);
-    if (recid & 2) {
-        secp256k1_num_add(&rx, &rx, &c->order);
-        if (secp256k1_num_cmp(&rx, &secp256k1_fe_consts->p) >= 0)
-            return 0;
-    }
-    unsigned char brx[32];
-    secp256k1_num_get_bin(brx, 32, &rx);
-    secp256k1_num_free(&rx);
-    secp256k1_fe_t fx;
-    secp256k1_fe_set_b32(&fx, brx);
-    secp256k1_ge_t x;
-    secp256k1_ge_set_xo(&x, &fx, recid & 1);
-    if (!secp256k1_ge_is_valid(&x))
-        return 0;
-    secp256k1_gej_t xj;
-    secp256k1_gej_set_ge(&xj, &x);
-    secp256k1_num_t rn, u1, u2;
-    secp256k1_num_init(&rn);
-    secp256k1_num_init(&u1);
-    secp256k1_num_init(&u2);
-    secp256k1_num_mod_inverse(&rn, &sig->r, &c->order);
-    secp256k1_num_mod_mul(&u1, &rn, message, &c->order);
-    secp256k1_num_sub(&u1, &c->order, &u1);
-    secp256k1_num_mod_mul(&u2, &rn, &sig->s, &c->order);
-    secp256k1_gej_t qj;
-    secp256k1_ecmult(&qj, &xj, &u2, &u1);
-    secp256k1_ge_set_gej(pubkey, &qj);
-    secp256k1_num_free(&rn);
-    secp256k1_num_free(&u1);
-    secp256k1_num_free(&u2);
-    return 1;
-}
-
-int static secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message) {
-    secp256k1_num_t r2;
-    secp256k1_num_init(&r2);
-    int ret = 0;
-    ret = secp256k1_ecdsa_sig_recompute(&r2, sig, pubkey, message) && secp256k1_num_cmp(&sig->r, &r2) == 0;
-    secp256k1_num_free(&r2);
-    return ret;
-}
-
-int static secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *seckey, const secp256k1_num_t *message, const secp256k1_num_t *nonce, int *recid) {
-    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
-
-    secp256k1_gej_t rp;
-    secp256k1_ecmult_gen(&rp, nonce);
-    secp256k1_ge_t r;
-    secp256k1_ge_set_gej(&r, &rp);
-    unsigned char b[32];
-    secp256k1_fe_normalize(&r.x);
-    secp256k1_fe_normalize(&r.y);
-    secp256k1_fe_get_b32(b, &r.x);
-    secp256k1_num_set_bin(&sig->r, b, 32);
-    if (recid)
-        *recid = (secp256k1_num_cmp(&sig->r, &c->order) >= 0 ? 2 : 0) | (secp256k1_fe_is_odd(&r.y) ? 1 : 0);
-    secp256k1_num_mod(&sig->r, &c->order);
-    secp256k1_num_t n;
-    secp256k1_num_init(&n);
-    secp256k1_num_mod_mul(&n, &sig->r, seckey, &c->order);
-    secp256k1_num_add(&n, &n, message);
-    secp256k1_num_mod(&n, &c->order);
-    secp256k1_num_mod_inverse(&sig->s, nonce, &c->order);
-    secp256k1_num_mod_mul(&sig->s, &sig->s, &n, &c->order);
-    secp256k1_num_free(&n);
-    if (secp256k1_num_is_zero(&sig->s))
-        return 0;
-    if (secp256k1_num_cmp(&sig->s, &c->half_order) > 0) {
-        secp256k1_num_sub(&sig->s, &c->order, &sig->s);
-        if (recid)
-            *recid ^= 1;
-    }
-    return 1;
-}
-
-void static secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *r, const secp256k1_num_t *s) {
-    secp256k1_num_copy(&sig->r, r);
-    secp256k1_num_copy(&sig->s, s);
-}
-
-void static secp256k1_ecdsa_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed) {
-    secp256k1_fe_normalize(&elem->x);
-    secp256k1_fe_normalize(&elem->y);
-    secp256k1_fe_get_b32(&pub[1], &elem->x);
-    if (compressed) {
-        *size = 33;
-        pub[0] = 0x02 | (secp256k1_fe_is_odd(&elem->y) ? 0x01 : 0x00);
-    } else {
-        *size = 65;
-        pub[0] = 0x04;
-        secp256k1_fe_get_b32(&pub[33], &elem->y);
-    }
-}
-
-int static secp256k1_ecdsa_privkey_parse(secp256k1_num_t *key, const unsigned char *privkey, int privkeylen) {
-    const unsigned char *end = privkey + privkeylen;
-    // sequence header
-    if (end < privkey+1 || *privkey != 0x30)
-        return 0;
-    privkey++;
-    // sequence length constructor
-    int lenb = 0;
-    if (end < privkey+1 || !(*privkey & 0x80))
-        return 0;
-    lenb = *privkey & ~0x80; privkey++;
-    if (lenb < 1 || lenb > 2)
-        return 0;
-    if (end < privkey+lenb)
-        return 0;
-    // sequence length
-    int len = 0;
-    len = privkey[lenb-1] | (lenb > 1 ? privkey[lenb-2] << 8 : 0);
-    privkey += lenb;
-    if (end < privkey+len)
-        return 0;
-    // sequence element 0: version number (=1)
-    if (end < privkey+3 || privkey[0] != 0x02 || privkey[1] != 0x01 || privkey[2] != 0x01)
-        return 0;
-    privkey += 3;
-    // sequence element 1: octet string, up to 32 bytes
-    if (end < privkey+2 || privkey[0] != 0x04 || privkey[1] > 0x20 || end < privkey+2+privkey[1])
-        return 0;
-    secp256k1_num_set_bin(key, privkey+2, privkey[1]);
-    return 1;
-}
-
-int static secp256k1_ecdsa_privkey_serialize(unsigned char *privkey, int *privkeylen, const secp256k1_num_t *key, int compressed) {
-    secp256k1_gej_t rp;
-    secp256k1_ecmult_gen(&rp, key);
-    secp256k1_ge_t r;
-    secp256k1_ge_set_gej(&r, &rp);
-    if (compressed) {
-        static const unsigned char begin[] = {
-            0x30,0x81,0xD3,0x02,0x01,0x01,0x04,0x20
-        };
-        static const unsigned char middle[] = {
-            0xA0,0x81,0x85,0x30,0x81,0x82,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
-            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
-            0x21,0x02,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
-            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
-            0x17,0x98,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
-            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x24,0x03,0x22,0x00
-        };
-        unsigned char *ptr = privkey;
-        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
-        secp256k1_num_get_bin(ptr, 32, key); ptr += 32;
-        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
-        int pubkeylen = 0;
-        secp256k1_ecdsa_pubkey_serialize(&r, ptr, &pubkeylen, 1); ptr += pubkeylen;
-        *privkeylen = ptr - privkey;
-    } else {
-        static const unsigned char begin[] = {
-            0x30,0x82,0x01,0x13,0x02,0x01,0x01,0x04,0x20
-        };
-        static const unsigned char middle[] = {
-            0xA0,0x81,0xA5,0x30,0x81,0xA2,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
-            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
-            0x41,0x04,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
-            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
-            0x17,0x98,0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,0x5D,0xA4,0xFB,0xFC,0x0E,0x11,
-            0x08,0xA8,0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,0x9C,0x47,0xD0,0x8F,0xFB,0x10,
-            0xD4,0xB8,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
-            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x44,0x03,0x42,0x00
-        };
-        unsigned char *ptr = privkey;
-        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
-        secp256k1_num_get_bin(ptr, 32, key); ptr += 32;
-        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
-        int pubkeylen = 0;
-        secp256k1_ecdsa_pubkey_serialize(&r, ptr, &pubkeylen, 0); ptr += pubkeylen;
-        *privkeylen = ptr - privkey;
-    }
-    return 1;
-}
-
-#endif
diff --git a/secp256k1/impl/ecmult.h b/secp256k1/impl/ecmult.h
deleted file mode 100644
index c6711ac4c..000000000
--- a/secp256k1/impl/ecmult.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_ECMULT_IMPL_H_
-#define _SECP256K1_ECMULT_IMPL_H_
-
-#include "../num.h"
-#include "../group.h"
-#include "../ecmult.h"
-
-// optimal for 128-bit and 256-bit exponents.
-#define WINDOW_A 5
-
-// larger numbers may result in slightly better performance, at the cost of
-// exponentially larger precomputed tables. WINDOW_G == 14 results in 640 KiB.
-#define WINDOW_G 14
-
-/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
- *  pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
- *  2^(w-2) entries.
- *
- *  There are two versions of this function:
- *  - secp256k1_ecmult_precomp_wnaf_gej, which operates on group elements in jacobian notation,
- *    fast to precompute, but slower to use in later additions.
- *  - secp256k1_ecmult_precomp_wnaf_ge, which operates on group elements in affine notations,
- *    (much) slower to precompute, but a bit faster to use in later additions.
- *  To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
- *  G is constant, so it only needs to be done once in advance.
- */
-void static secp256k1_ecmult_table_precomp_gej(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
-    pre[0] = *a;
-    secp256k1_gej_t d; secp256k1_gej_double(&d, &pre[0]);
-    for (int i=1; i<(1 << (w-2)); i++)
-        secp256k1_gej_add(&pre[i], &d, &pre[i-1]);
-}
-
-void static secp256k1_ecmult_table_precomp_ge(secp256k1_ge_t *pre, const secp256k1_ge_t *a, int w) {
-    pre[0] = *a;
-    secp256k1_gej_t x; secp256k1_gej_set_ge(&x, a);
-    secp256k1_gej_t d; secp256k1_gej_double(&d, &x);
-    for (int i=1; i<(1 << (w-2)); i++) {
-        secp256k1_gej_add_ge(&x, &d, &pre[i-1]);
-        secp256k1_ge_set_gej(&pre[i], &x);
-    }
-}
-
-/** The number of entries a table with precomputed multiples needs to have. */
-#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
-
-/** The following two macro retrieves a particular odd multiple from a table
- *  of precomputed multiples. */
-#define ECMULT_TABLE_GET(r,pre,n,w,neg) do { \
-    assert(((n) & 1) == 1); \
-    assert((n) >= -((1 << ((w)-1)) - 1)); \
-    assert((n) <=  ((1 << ((w)-1)) - 1)); \
-    if ((n) > 0) \
-        *(r) = (pre)[((n)-1)/2]; \
-    else \
-        (neg)((r), &(pre)[(-(n)-1)/2]); \
-} while(0)
-
-#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
-#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
-
-typedef struct {
-    secp256k1_ge_t pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];    // odd multiples of the generator
-    secp256k1_ge_t pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)]; // odd multiples of 2^128*generator
-    secp256k1_ge_t prec[64][16]; // prec[j][i] = 16^j * (i+1) * G
-    secp256k1_ge_t fin; // -(sum(prec[j][0], j=0..63))
-} secp256k1_ecmult_consts_t;
-
-static const secp256k1_ecmult_consts_t *secp256k1_ecmult_consts = NULL;
-
-static void secp256k1_ecmult_start(void) {
-    if (secp256k1_ecmult_consts != NULL)
-        return;
-
-    secp256k1_ecmult_consts_t *ret = (secp256k1_ecmult_consts_t*)malloc(sizeof(secp256k1_ecmult_consts_t));
-    secp256k1_ecmult_consts = ret;
-
-    // get the generator
-    const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
-
-    // calculate 2^128*generator
-    secp256k1_gej_t g_128j; secp256k1_gej_set_ge(&g_128j, g);
-    for (int i=0; i<128; i++)
-        secp256k1_gej_double(&g_128j, &g_128j);
-    secp256k1_ge_t g_128; secp256k1_ge_set_gej(&g_128, &g_128j);
-
-    // precompute the tables with odd multiples
-    secp256k1_ecmult_table_precomp_ge(ret->pre_g, g, WINDOW_G);
-    secp256k1_ecmult_table_precomp_ge(ret->pre_g_128, &g_128, WINDOW_G);
-
-    // compute prec and fin
-    secp256k1_gej_t gg; secp256k1_gej_set_ge(&gg, g);
-    secp256k1_ge_t ad = *g;
-    secp256k1_gej_t fn; secp256k1_gej_set_infinity(&fn);
-    for (int j=0; j<64; j++) {
-        secp256k1_ge_set_gej(&ret->prec[j][0], &gg);
-        secp256k1_gej_add(&fn, &fn, &gg);
-        for (int i=1; i<16; i++) {
-            secp256k1_gej_add_ge(&gg, &gg, &ad);
-            secp256k1_ge_set_gej(&ret->prec[j][i], &gg);
-        }
-        ad = ret->prec[j][15];
-    }
-    secp256k1_ge_set_gej(&ret->fin, &fn);
-    secp256k1_ge_neg(&ret->fin, &ret->fin);
-}
-
-static void secp256k1_ecmult_stop(void) {
-    if (secp256k1_ecmult_consts == NULL)
-        return;
-
-    secp256k1_ecmult_consts_t *c = (secp256k1_ecmult_consts_t*)secp256k1_ecmult_consts;
-    free(c);
-    secp256k1_ecmult_consts = NULL;
-}
-
-/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
- *  with the following guarantees:
- *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
- *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
- *  - the index of the highest non-zero entry in wnaf (=return value-1) is at most bits, where
- *    bits is the number of bits necessary to represent the absolute value of the input.
- */
-static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_num_t *a, int w) {
-    int ret = 0;
-    int zeroes = 0;
-    secp256k1_num_t x;
-    secp256k1_num_init(&x);
-    secp256k1_num_copy(&x, a);
-    int sign = 1;
-    if (secp256k1_num_is_neg(&x)) {
-        sign = -1;
-        secp256k1_num_negate(&x);
-    }
-    while (!secp256k1_num_is_zero(&x)) {
-        while (!secp256k1_num_is_odd(&x)) {
-            zeroes++;
-            secp256k1_num_shift(&x, 1);
-        }
-        int word = secp256k1_num_shift(&x, w);
-        while (zeroes) {
-            wnaf[ret++] = 0;
-            zeroes--;
-        }
-        if (word & (1 << (w-1))) {
-            secp256k1_num_inc(&x);
-            wnaf[ret++] = sign * (word - (1 << w));
-        } else {
-            wnaf[ret++] = sign * word;
-        }
-        zeroes = w-1;
-    }
-    secp256k1_num_free(&x);
-    return ret;
-}
-
-void static secp256k1_ecmult_gen(secp256k1_gej_t *r, const secp256k1_num_t *gn) {
-    secp256k1_num_t n;
-    secp256k1_num_init(&n);
-    secp256k1_num_copy(&n, gn);
-    const secp256k1_ecmult_consts_t *c = secp256k1_ecmult_consts;
-    secp256k1_gej_set_ge(r, &c->prec[0][secp256k1_num_shift(&n, 4)]);
-    for (int j=1; j<64; j++)
-        secp256k1_gej_add_ge(r, r, &c->prec[j][secp256k1_num_shift(&n, 4)]);
-    secp256k1_num_free(&n);
-    secp256k1_gej_add_ge(r, r, &c->fin);
-}
-
-void static secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_num_t *na, const secp256k1_num_t *ng) {
-    const secp256k1_ecmult_consts_t *c = secp256k1_ecmult_consts;
-
-#ifdef USE_ENDOMORPHISM
-    secp256k1_num_t na_1, na_lam;
-    secp256k1_num_init(&na_1);
-    secp256k1_num_init(&na_lam);
-    // split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit)
-    secp256k1_gej_split_exp(&na_1, &na_lam, na);
-
-    // build wnaf representation for na_1 and na_lam.
-    int wnaf_na_1[129];   int bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
-    int wnaf_na_lam[129]; int bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, &na_lam, WINDOW_A);
-    int bits = bits_na_1;
-    if (bits_na_lam > bits) bits = bits_na_lam;
-
-    // calculate a_lam = a*lambda
-    secp256k1_gej_t a_lam; secp256k1_gej_mul_lambda(&a_lam, a);
-
-    // calculate odd multiples of a_lam
-    secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
-    secp256k1_ecmult_table_precomp_gej(pre_a_lam, &a_lam, WINDOW_A);
-#else
-    // build wnaf representation for na.
-    int wnaf_na[257];     int bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     na,      WINDOW_A);
-    int bits = bits_na;
-#endif
-
-    // calculate odd multiples of a
-    secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
-    secp256k1_ecmult_table_precomp_gej(pre_a, a, WINDOW_A);
-
-    // Splitted G factors.
-    secp256k1_num_t ng_1, ng_128;
-    secp256k1_num_init(&ng_1);
-    secp256k1_num_init(&ng_128);
-
-    // split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit)
-    secp256k1_num_split(&ng_1, &ng_128, ng, 128);
-
-    // Build wnaf representation for ng_1 and ng_128
-    int wnaf_ng_1[129];   int bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   &ng_1,   WINDOW_G);
-    int wnaf_ng_128[129]; int bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, &ng_128, WINDOW_G);
-    if (bits_ng_1 > bits) bits = bits_ng_1;
-    if (bits_ng_128 > bits) bits = bits_ng_128;
-
-    secp256k1_gej_set_infinity(r);
-    secp256k1_gej_t tmpj;
-    secp256k1_ge_t tmpa;
-
-    for (int i=bits-1; i>=0; i--) {
-        secp256k1_gej_double(r, r);
-        int n;
-#ifdef USE_ENDOMORPHISM
-        if (i < bits_na_1 && (n = wnaf_na_1[i])) {
-            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
-            secp256k1_gej_add(r, r, &tmpj);
-        }
-        if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
-            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a_lam, n, WINDOW_A);
-            secp256k1_gej_add(r, r, &tmpj);
-        }
-#else
-        if (i < bits_na && (n = wnaf_na[i])) {
-            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
-            secp256k1_gej_add(r, r, &tmpj);
-        }
-#endif
-        if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g, n, WINDOW_G);
-            secp256k1_gej_add_ge(r, r, &tmpa);
-        }
-        if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g_128, n, WINDOW_G);
-            secp256k1_gej_add_ge(r, r, &tmpa);
-        }
-    }
-
-#ifdef USE_ENDOMORPHISM
-    secp256k1_num_free(&na_1);
-    secp256k1_num_free(&na_lam);
-#endif
-    secp256k1_num_free(&ng_1);
-    secp256k1_num_free(&ng_128);
-}
-
-#endif
diff --git a/secp256k1/impl/field.h b/secp256k1/impl/field.h
deleted file mode 100644
index c43188adc..000000000
--- a/secp256k1/impl/field.h
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_IMPL_H_
-#define _SECP256K1_FIELD_IMPL_H_
-
-#if defined(USE_FIELD_GMP)
-#include "field_gmp.h"
-#elif defined(USE_FIELD_10X26)
-#include "field_10x26.h"
-#elif defined(USE_FIELD_5X52)
-#include "field_5x52.h"
-#else
-#error "Please select field implementation"
-#endif
-
-void static secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) {
-    if (*rlen < 65) {
-        *rlen = 65;
-        return;
-    }
-    *rlen = 65;
-    unsigned char tmp[32];
-    secp256k1_fe_t b = *a;
-    secp256k1_fe_normalize(&b);
-    secp256k1_fe_get_b32(tmp, &b);
-    for (int i=0; i<32; i++) {
-        static const char *c = "0123456789ABCDEF";
-        r[2*i]   = c[(tmp[i] >> 4) & 0xF];
-        r[2*i+1] = c[(tmp[i]) & 0xF];
-    }
-    r[64] = 0x00;
-}
-
-void static secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
-    unsigned char tmp[32] = {};
-    static const int cvt[256] = {0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
-                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0};
-    for (int i=0; i<32; i++) {
-        if (alen > i*2)
-            tmp[32 - alen/2 + i] = (cvt[(unsigned char)a[2*i]] << 4) + cvt[(unsigned char)a[2*i+1]];
-    }
-    secp256k1_fe_set_b32(r, tmp);
-}
-
-void static secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-    // calculate a^p, with p={15,780,1022,1023}
-    secp256k1_fe_t a2; secp256k1_fe_sqr(&a2, a);
-    secp256k1_fe_t a3; secp256k1_fe_mul(&a3, &a2, a);
-    secp256k1_fe_t a6; secp256k1_fe_sqr(&a6, &a3);
-    secp256k1_fe_t a12; secp256k1_fe_sqr(&a12, &a6);
-    secp256k1_fe_t a15; secp256k1_fe_mul(&a15, &a12, &a3);
-    secp256k1_fe_t a30; secp256k1_fe_sqr(&a30, &a15);
-    secp256k1_fe_t a60; secp256k1_fe_sqr(&a60, &a30);
-    secp256k1_fe_t a120; secp256k1_fe_sqr(&a120, &a60);
-    secp256k1_fe_t a240; secp256k1_fe_sqr(&a240, &a120);
-    secp256k1_fe_t a255; secp256k1_fe_mul(&a255, &a240, &a15);
-    secp256k1_fe_t a510; secp256k1_fe_sqr(&a510, &a255);
-    secp256k1_fe_t a750; secp256k1_fe_mul(&a750, &a510, &a240);
-    secp256k1_fe_t a780; secp256k1_fe_mul(&a780, &a750, &a30);
-    secp256k1_fe_t a1020; secp256k1_fe_sqr(&a1020, &a510);
-    secp256k1_fe_t a1022; secp256k1_fe_mul(&a1022, &a1020, &a2);
-    secp256k1_fe_t a1023; secp256k1_fe_mul(&a1023, &a1022, a);
-    secp256k1_fe_t x = a15;
-    for (int i=0; i<21; i++) {
-        for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-        secp256k1_fe_mul(&x, &x, &a1023);
-    }
-    for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-    secp256k1_fe_mul(&x, &x, &a1022);
-    for (int i=0; i<2; i++) {
-        for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-        secp256k1_fe_mul(&x, &x, &a1023);
-    }
-    for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-    secp256k1_fe_mul(r, &x, &a780);
-}
-
-void static secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-    // calculate a^p, with p={45,63,1019,1023}
-    secp256k1_fe_t a2; secp256k1_fe_sqr(&a2, a);
-    secp256k1_fe_t a3; secp256k1_fe_mul(&a3, &a2, a);
-    secp256k1_fe_t a4; secp256k1_fe_sqr(&a4, &a2);
-    secp256k1_fe_t a5; secp256k1_fe_mul(&a5, &a4, a);
-    secp256k1_fe_t a10; secp256k1_fe_sqr(&a10, &a5);
-    secp256k1_fe_t a11; secp256k1_fe_mul(&a11, &a10, a);
-    secp256k1_fe_t a21; secp256k1_fe_mul(&a21, &a11, &a10);
-    secp256k1_fe_t a42; secp256k1_fe_sqr(&a42, &a21);
-    secp256k1_fe_t a45; secp256k1_fe_mul(&a45, &a42, &a3);
-    secp256k1_fe_t a63; secp256k1_fe_mul(&a63, &a42, &a21);
-    secp256k1_fe_t a126; secp256k1_fe_sqr(&a126, &a63);
-    secp256k1_fe_t a252; secp256k1_fe_sqr(&a252, &a126);
-    secp256k1_fe_t a504; secp256k1_fe_sqr(&a504, &a252);
-    secp256k1_fe_t a1008; secp256k1_fe_sqr(&a1008, &a504);
-    secp256k1_fe_t a1019; secp256k1_fe_mul(&a1019, &a1008, &a11);
-    secp256k1_fe_t a1023; secp256k1_fe_mul(&a1023, &a1019, &a4);
-    secp256k1_fe_t x = a63;
-    for (int i=0; i<21; i++) {
-        for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-        secp256k1_fe_mul(&x, &x, &a1023);
-    }
-    for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-    secp256k1_fe_mul(&x, &x, &a1019);
-    for (int i=0; i<2; i++) {
-        for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-        secp256k1_fe_mul(&x, &x, &a1023);
-    }
-    for (int j=0; j<10; j++) secp256k1_fe_sqr(&x, &x);
-    secp256k1_fe_mul(r, &x, &a45);
-}
-
-void static secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#if defined(USE_FIELD_INV_BUILTIN)
-    secp256k1_fe_inv(r, a);
-#elif defined(USE_FIELD_INV_NUM)
-    unsigned char b[32];
-    secp256k1_fe_t c = *a;
-    secp256k1_fe_normalize(&c);
-    secp256k1_fe_get_b32(b, &c);
-    secp256k1_num_t n; 
-    secp256k1_num_init(&n);
-    secp256k1_num_set_bin(&n, b, 32);
-    secp256k1_num_mod_inverse(&n, &n, &secp256k1_fe_consts->p);
-    secp256k1_num_get_bin(b, 32, &n);
-    secp256k1_num_free(&n);
-    secp256k1_fe_set_b32(r, b);
-#else
-#error "Please select field inverse implementation"
-#endif
-}
-
-void static secp256k1_fe_start(void) {
-    static const unsigned char secp256k1_fe_consts_p[] = {
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
-    };
-    if (secp256k1_fe_consts == NULL) {
-        secp256k1_fe_inner_start();
-        secp256k1_fe_consts_t *ret = (secp256k1_fe_consts_t*)malloc(sizeof(secp256k1_fe_consts_t));
-        secp256k1_num_init(&ret->p);
-        secp256k1_num_set_bin(&ret->p, secp256k1_fe_consts_p, sizeof(secp256k1_fe_consts_p));
-        secp256k1_fe_consts = ret;
-    }
-}
-
-void static secp256k1_fe_stop(void) {
-    if (secp256k1_fe_consts != NULL) {
-        secp256k1_fe_consts_t *c = (secp256k1_fe_consts_t*)secp256k1_fe_consts;
-        secp256k1_num_free(&c->p);
-        free((void*)c);
-        secp256k1_fe_consts = NULL;
-        secp256k1_fe_inner_stop();
-    }
-}
-
-#endif
diff --git a/secp256k1/impl/field_10x26.h b/secp256k1/impl/field_10x26.h
deleted file mode 100644
index 449769254..000000000
--- a/secp256k1/impl/field_10x26.h
+++ /dev/null
@@ -1,487 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
-#define _SECP256K1_FIELD_REPR_IMPL_H_
-
-#include <stdio.h>
-#include <assert.h>
-#include <string.h>
-#include "../num.h"
-#include "../field.h"
-
-void static secp256k1_fe_inner_start(void) {}
-void static secp256k1_fe_inner_stop(void) {}
-
-void static secp256k1_fe_normalize(secp256k1_fe_t *r) {
-//    fog("normalize in: ", r);
-    uint32_t c;
-    c = r->n[0];
-    uint32_t t0 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[1];
-    uint32_t t1 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[2];
-    uint32_t t2 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[3];
-    uint32_t t3 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[4];
-    uint32_t t4 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[5];
-    uint32_t t5 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[6];
-    uint32_t t6 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[7];
-    uint32_t t7 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[8];
-    uint32_t t8 = c & 0x3FFFFFFUL;
-    c = (c >> 26) + r->n[9];
-    uint32_t t9 = c & 0x03FFFFFUL;
-    c >>= 22;
-/*    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
-    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
-    fog("         tm1: ", r);
-    fprintf(stderr, "out c= %08lx\n", (unsigned long)c);*/
-
-    // The following code will not modify the t's if c is initially 0.
-    uint32_t d = c * 0x3D1UL + t0;
-    t0 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t1 + c*0x40;
-    t1 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t2;
-    t2 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t3;
-    t3 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t4;
-    t4 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t5;
-    t5 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t6;
-    t6 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t7;
-    t7 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t8;
-    t8 = d & 0x3FFFFFFULL;
-    d = (d >> 26) + t9;
-    t9 = d & 0x03FFFFFULL;
-    assert((d >> 22) == 0);
-/*    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
-    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
-    fog("         tm2: ", r); */
-
-    // Subtract p if result >= p
-    uint64_t low = ((uint64_t)t1 << 26) | t0;
-    uint64_t mask = -(int64_t)((t9 < 0x03FFFFFUL) | (t8 < 0x3FFFFFFUL) | (t7 < 0x3FFFFFFUL) | (t6 < 0x3FFFFFFUL) | (t5 < 0x3FFFFFFUL) | (t4 < 0x3FFFFFFUL) | (t3 < 0x3FFFFFFUL) | (t2 < 0x3FFFFFFUL) | (low < 0xFFFFEFFFFFC2FULL));
-    t9 &= mask;
-    t8 &= mask;
-    t7 &= mask;
-    t6 &= mask;
-    t5 &= mask;
-    t4 &= mask;
-    t3 &= mask;
-    t2 &= mask;
-    low -= (~mask & 0xFFFFEFFFFFC2FULL);
-
-    // push internal variables back
-    r->n[0] = low & 0x3FFFFFFUL; r->n[1] = (low >> 26) & 0x3FFFFFFUL; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
-    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
-/*    fog("         out: ", r);*/
-
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
-    r->n[0] = a;
-    r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return (a->n[0] == 0 && a->n[1] == 0 && a->n[2] == 0 && a->n[3] == 0 && a->n[4] == 0 && a->n[5] == 0 && a->n[6] == 0 && a->n[7] == 0 && a->n[8] == 0 && a->n[9] == 0);
-}
-
-int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return a->n[0] & 1;
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    assert(a->normalized);
-    assert(b->normalized);
-#endif
-    return (a->n[0] == b->n[0] && a->n[1] == b->n[1] && a->n[2] == b->n[2] && a->n[3] == b->n[3] && a->n[4] == b->n[4] &&
-            a->n[5] == b->n[5] && a->n[6] == b->n[6] && a->n[7] == b->n[7] && a->n[8] == b->n[8] && a->n[9] == b->n[9]);
-}
-
-void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
-    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-    r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
-    for (int i=0; i<32; i++) {
-        for (int j=0; j<4; j++) {
-            int limb = (8*i+2*j)/26;
-            int shift = (8*i+2*j)%26;
-            r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
-        }
-    }
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
-void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    for (int i=0; i<32; i++) {
-        int c = 0;
-        for (int j=0; j<4; j++) {
-            int limb = (8*i+2*j)/26;
-            int shift = (8*i+2*j)%26;
-            c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
-        }
-        r[31-i] = c;
-    }
-}
-
-void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
-#ifdef VERIFY
-    assert(a->magnitude <= m);
-    r->magnitude = m + 1;
-    r->normalized = 0;
-#endif
-    r->n[0] = 0x3FFFC2FUL * (m + 1) - a->n[0];
-    r->n[1] = 0x3FFFFBFUL * (m + 1) - a->n[1];
-    r->n[2] = 0x3FFFFFFUL * (m + 1) - a->n[2];
-    r->n[3] = 0x3FFFFFFUL * (m + 1) - a->n[3];
-    r->n[4] = 0x3FFFFFFUL * (m + 1) - a->n[4];
-    r->n[5] = 0x3FFFFFFUL * (m + 1) - a->n[5];
-    r->n[6] = 0x3FFFFFFUL * (m + 1) - a->n[6];
-    r->n[7] = 0x3FFFFFFUL * (m + 1) - a->n[7];
-    r->n[8] = 0x3FFFFFFUL * (m + 1) - a->n[8];
-    r->n[9] = 0x03FFFFFUL * (m + 1) - a->n[9];
-}
-
-void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
-#ifdef VERIFY
-    r->magnitude *= a;
-    r->normalized = 0;
-#endif
-    r->n[0] *= a;
-    r->n[1] *= a;
-    r->n[2] *= a;
-    r->n[3] *= a;
-    r->n[4] *= a;
-    r->n[5] *= a;
-    r->n[6] *= a;
-    r->n[7] *= a;
-    r->n[8] *= a;
-    r->n[9] *= a;
-}
-
-void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    r->magnitude += a->magnitude;
-    r->normalized = 0;
-#endif
-    r->n[0] += a->n[0];
-    r->n[1] += a->n[1];
-    r->n[2] += a->n[2];
-    r->n[3] += a->n[3];
-    r->n[4] += a->n[4];
-    r->n[5] += a->n[5];
-    r->n[6] += a->n[6];
-    r->n[7] += a->n[7];
-    r->n[8] += a->n[8];
-    r->n[9] += a->n[9];
-}
-
-void static inline secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t *b, uint32_t *r) {
-    uint64_t c = (uint64_t)a[0] * b[0];
-    uint32_t t0 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[1] +
-            (uint64_t)a[1] * b[0];
-    uint32_t t1 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[2] +
-            (uint64_t)a[1] * b[1] +
-            (uint64_t)a[2] * b[0];
-    uint32_t t2 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[3] +
-            (uint64_t)a[1] * b[2] +
-            (uint64_t)a[2] * b[1] +
-            (uint64_t)a[3] * b[0];
-    uint32_t t3 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[4] +
-            (uint64_t)a[1] * b[3] +
-            (uint64_t)a[2] * b[2] +
-            (uint64_t)a[3] * b[1] +
-            (uint64_t)a[4] * b[0];
-    uint32_t t4 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[5] +
-            (uint64_t)a[1] * b[4] +
-            (uint64_t)a[2] * b[3] +
-            (uint64_t)a[3] * b[2] +
-            (uint64_t)a[4] * b[1] +
-            (uint64_t)a[5] * b[0];
-    uint32_t t5 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[6] +
-            (uint64_t)a[1] * b[5] +
-            (uint64_t)a[2] * b[4] +
-            (uint64_t)a[3] * b[3] +
-            (uint64_t)a[4] * b[2] +
-            (uint64_t)a[5] * b[1] +
-            (uint64_t)a[6] * b[0];
-    uint32_t t6 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[7] +
-            (uint64_t)a[1] * b[6] +
-            (uint64_t)a[2] * b[5] +
-            (uint64_t)a[3] * b[4] +
-            (uint64_t)a[4] * b[3] +
-            (uint64_t)a[5] * b[2] +
-            (uint64_t)a[6] * b[1] +
-            (uint64_t)a[7] * b[0];
-    uint32_t t7 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[8] +
-            (uint64_t)a[1] * b[7] +
-            (uint64_t)a[2] * b[6] +
-            (uint64_t)a[3] * b[5] +
-            (uint64_t)a[4] * b[4] +
-            (uint64_t)a[5] * b[3] +
-            (uint64_t)a[6] * b[2] +
-            (uint64_t)a[7] * b[1] +
-            (uint64_t)a[8] * b[0];
-    uint32_t t8 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[0] * b[9] +
-            (uint64_t)a[1] * b[8] +
-            (uint64_t)a[2] * b[7] +
-            (uint64_t)a[3] * b[6] +
-            (uint64_t)a[4] * b[5] +
-            (uint64_t)a[5] * b[4] +
-            (uint64_t)a[6] * b[3] +
-            (uint64_t)a[7] * b[2] +
-            (uint64_t)a[8] * b[1] +
-            (uint64_t)a[9] * b[0];
-    uint32_t t9 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[1] * b[9] +
-            (uint64_t)a[2] * b[8] +
-            (uint64_t)a[3] * b[7] +
-            (uint64_t)a[4] * b[6] +
-            (uint64_t)a[5] * b[5] +
-            (uint64_t)a[6] * b[4] +
-            (uint64_t)a[7] * b[3] +
-            (uint64_t)a[8] * b[2] +
-            (uint64_t)a[9] * b[1];
-    uint32_t t10 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[2] * b[9] +
-            (uint64_t)a[3] * b[8] +
-            (uint64_t)a[4] * b[7] +
-            (uint64_t)a[5] * b[6] +
-            (uint64_t)a[6] * b[5] +
-            (uint64_t)a[7] * b[4] +
-            (uint64_t)a[8] * b[3] +
-            (uint64_t)a[9] * b[2];
-    uint32_t t11 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[3] * b[9] +
-            (uint64_t)a[4] * b[8] +
-            (uint64_t)a[5] * b[7] +
-            (uint64_t)a[6] * b[6] +
-            (uint64_t)a[7] * b[5] +
-            (uint64_t)a[8] * b[4] +
-            (uint64_t)a[9] * b[3];
-    uint32_t t12 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[4] * b[9] +
-            (uint64_t)a[5] * b[8] +
-            (uint64_t)a[6] * b[7] +
-            (uint64_t)a[7] * b[6] +
-            (uint64_t)a[8] * b[5] +
-            (uint64_t)a[9] * b[4];
-    uint32_t t13 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[5] * b[9] +
-            (uint64_t)a[6] * b[8] +
-            (uint64_t)a[7] * b[7] +
-            (uint64_t)a[8] * b[6] +
-            (uint64_t)a[9] * b[5];
-    uint32_t t14 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[6] * b[9] +
-            (uint64_t)a[7] * b[8] +
-            (uint64_t)a[8] * b[7] +
-            (uint64_t)a[9] * b[6];
-    uint32_t t15 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[7] * b[9] +
-            (uint64_t)a[8] * b[8] +
-            (uint64_t)a[9] * b[7];
-    uint32_t t16 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[8] * b[9] +
-            (uint64_t)a[9] * b[8];
-    uint32_t t17 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[9] * b[9];
-    uint32_t t18 = c & 0x3FFFFFFUL; c = c >> 26;
-    uint32_t t19 = c;
-
-    c = t0 + (uint64_t)t10 * 0x3D10UL;
-    t0 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t1 + (uint64_t)t10*0x400UL + (uint64_t)t11 * 0x3D10UL;
-    t1 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t2 + (uint64_t)t11*0x400UL + (uint64_t)t12 * 0x3D10UL;
-    t2 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t3 + (uint64_t)t12*0x400UL + (uint64_t)t13 * 0x3D10UL;
-    r[3] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t4 + (uint64_t)t13*0x400UL + (uint64_t)t14 * 0x3D10UL;
-    r[4] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t5 + (uint64_t)t14*0x400UL + (uint64_t)t15 * 0x3D10UL;
-    r[5] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t6 + (uint64_t)t15*0x400UL + (uint64_t)t16 * 0x3D10UL;
-    r[6] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t7 + (uint64_t)t16*0x400UL + (uint64_t)t17 * 0x3D10UL;
-    r[7] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t8 + (uint64_t)t17*0x400UL + (uint64_t)t18 * 0x3D10UL;
-    r[8] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t9 + (uint64_t)t18*0x400UL + (uint64_t)t19 * 0x1000003D10ULL;
-    r[9] = c & 0x03FFFFFUL; c = c >> 22;
-    uint64_t d = t0 + c * 0x3D1UL;
-    r[0] = d & 0x3FFFFFFUL; d = d >> 26;
-    d = d + t1 + c*0x40;
-    r[1] = d & 0x3FFFFFFUL; d = d >> 26;
-    r[2] = t2 + d;
-}
-
-void static inline secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t *r) {
-    uint64_t c = (uint64_t)a[0] * a[0];
-    uint32_t t0 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[1];
-    uint32_t t1 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[2] +
-            (uint64_t)a[1] * a[1];
-    uint32_t t2 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[3] +
-            (uint64_t)(a[1]*2) * a[2];
-    uint32_t t3 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[4] +
-            (uint64_t)(a[1]*2) * a[3] +
-            (uint64_t)a[2] * a[2];
-    uint32_t t4 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[5] +
-            (uint64_t)(a[1]*2) * a[4] +
-            (uint64_t)(a[2]*2) * a[3];
-    uint32_t t5 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[6] +
-            (uint64_t)(a[1]*2) * a[5] +
-            (uint64_t)(a[2]*2) * a[4] +
-            (uint64_t)a[3] * a[3];
-    uint32_t t6 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[7] +
-            (uint64_t)(a[1]*2) * a[6] +
-            (uint64_t)(a[2]*2) * a[5] +
-            (uint64_t)(a[3]*2) * a[4];
-    uint32_t t7 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[8] +
-            (uint64_t)(a[1]*2) * a[7] +
-            (uint64_t)(a[2]*2) * a[6] +
-            (uint64_t)(a[3]*2) * a[5] +
-            (uint64_t)a[4] * a[4];
-    uint32_t t8 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[0]*2) * a[9] +
-            (uint64_t)(a[1]*2) * a[8] +
-            (uint64_t)(a[2]*2) * a[7] +
-            (uint64_t)(a[3]*2) * a[6] +
-            (uint64_t)(a[4]*2) * a[5];
-    uint32_t t9 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[1]*2) * a[9] +
-            (uint64_t)(a[2]*2) * a[8] +
-            (uint64_t)(a[3]*2) * a[7] +
-            (uint64_t)(a[4]*2) * a[6] +
-            (uint64_t)a[5] * a[5];
-    uint32_t t10 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[2]*2) * a[9] +
-            (uint64_t)(a[3]*2) * a[8] +
-            (uint64_t)(a[4]*2) * a[7] +
-            (uint64_t)(a[5]*2) * a[6];
-    uint32_t t11 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[3]*2) * a[9] +
-            (uint64_t)(a[4]*2) * a[8] +
-            (uint64_t)(a[5]*2) * a[7] +
-            (uint64_t)a[6] * a[6];
-    uint32_t t12 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[4]*2) * a[9] +
-            (uint64_t)(a[5]*2) * a[8] +
-            (uint64_t)(a[6]*2) * a[7];
-    uint32_t t13 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[5]*2) * a[9] +
-            (uint64_t)(a[6]*2) * a[8] +
-            (uint64_t)a[7] * a[7];
-    uint32_t t14 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[6]*2) * a[9] +
-            (uint64_t)(a[7]*2) * a[8];
-    uint32_t t15 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[7]*2) * a[9] +
-            (uint64_t)a[8] * a[8];
-    uint32_t t16 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)(a[8]*2) * a[9];
-    uint32_t t17 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + (uint64_t)a[9] * a[9];
-    uint32_t t18 = c & 0x3FFFFFFUL; c = c >> 26;
-    uint32_t t19 = c;
-
-    c = t0 + (uint64_t)t10 * 0x3D10UL;
-    t0 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t1 + (uint64_t)t10*0x400UL + (uint64_t)t11 * 0x3D10UL;
-    t1 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t2 + (uint64_t)t11*0x400UL + (uint64_t)t12 * 0x3D10UL;
-    t2 = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t3 + (uint64_t)t12*0x400UL + (uint64_t)t13 * 0x3D10UL;
-    r[3] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t4 + (uint64_t)t13*0x400UL + (uint64_t)t14 * 0x3D10UL;
-    r[4] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t5 + (uint64_t)t14*0x400UL + (uint64_t)t15 * 0x3D10UL;
-    r[5] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t6 + (uint64_t)t15*0x400UL + (uint64_t)t16 * 0x3D10UL;
-    r[6] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t7 + (uint64_t)t16*0x400UL + (uint64_t)t17 * 0x3D10UL;
-    r[7] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t8 + (uint64_t)t17*0x400UL + (uint64_t)t18 * 0x3D10UL;
-    r[8] = c & 0x3FFFFFFUL; c = c >> 26;
-    c = c + t9 + (uint64_t)t18*0x400UL + (uint64_t)t19 * 0x1000003D10ULL;
-    r[9] = c & 0x03FFFFFUL; c = c >> 22;
-    uint64_t d = t0 + c * 0x3D1UL;
-    r[0] = d & 0x3FFFFFFUL; d = d >> 26;
-    d = d + t1 + c*0x40;
-    r[1] = d & 0x3FFFFFFUL; d = d >> 26;
-    r[2] = t2 + d;
-}
-
-
-void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    assert(a->magnitude <= 8);
-    assert(b->magnitude <= 8);
-    r->magnitude = 1;
-    r->normalized = 0;
-#endif
-    secp256k1_fe_mul_inner(a->n, b->n, r->n);
-}
-
-void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->magnitude <= 8);
-    r->magnitude = 1;
-    r->normalized = 0;
-#endif
-    secp256k1_fe_sqr_inner(a->n, r->n);
-}
-
-#endif
diff --git a/secp256k1/impl/field_5x52.h b/secp256k1/impl/field_5x52.h
deleted file mode 100644
index 5347189f1..000000000
--- a/secp256k1/impl/field_5x52.h
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
-#define _SECP256K1_FIELD_REPR_IMPL_H_
-
-#include <assert.h>
-#include <string.h>
-#include "../num.h"
-#include "../field.h"
-
-#if defined(USE_FIELD_5X52_ASM)
-#include "field_5x52_asm.h"
-#elif defined(USE_FIELD_5X52_INT128)
-#include "field_5x52_int128.h"
-#else
-#error "Please select field_5x52 implementation"
-#endif
-
-/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F,
- *  represented as 5 uint64_t's in base 2^52. The values are allowed to contain >52 each. In particular,
- *  each FieldElem has a 'magnitude' associated with it. Internally, a magnitude M means each element
- *  is at most M*(2^53-1), except the most significant one, which is limited to M*(2^49-1). All operations
- *  accept any input with magnitude at most M, and have different rules for propagating magnitude to their
- *  output.
- */
-
-void static secp256k1_fe_inner_start(void) {}
-void static secp256k1_fe_inner_stop(void) {}
-
-void static secp256k1_fe_normalize(secp256k1_fe_t *r) {
-    uint64_t c;
-    c = r->n[0];
-    uint64_t t0 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + r->n[1];
-    uint64_t t1 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + r->n[2];
-    uint64_t t2 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + r->n[3];
-    uint64_t t3 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + r->n[4];
-    uint64_t t4 = c & 0x0FFFFFFFFFFFFULL;
-    c >>= 48;
-
-    // The following code will not modify the t's if c is initially 0.
-    c = c * 0x1000003D1ULL + t0;
-    t0 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + t1;
-    t1 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + t2;
-    t2 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + t3;
-    t3 = c & 0xFFFFFFFFFFFFFULL;
-    c = (c >> 52) + t4;
-    t4 = c & 0x0FFFFFFFFFFFFULL;
-    assert((c >> 48) == 0);
-
-    // Subtract p if result >= p
-    uint64_t mask = -(int64_t)((t4 < 0xFFFFFFFFFFFFULL) | (t3 < 0xFFFFFFFFFFFFFULL) | (t2 < 0xFFFFFFFFFFFFFULL) | (t1 < 0xFFFFFFFFFFFFFULL) | (t0 < 0xFFFFEFFFFFC2FULL));
-    t4 &= mask;
-    t3 &= mask;
-    t2 &= mask;
-    t1 &= mask;
-    t0 -= (~mask & 0xFFFFEFFFFFC2FULL);
-
-    // push internal variables back
-    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
-
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
-    r->n[0] = a;
-    r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return (a->n[0] == 0 && a->n[1] == 0 && a->n[2] == 0 && a->n[3] == 0 && a->n[4] == 0);
-}
-
-int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return a->n[0] & 1;
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    assert(a->normalized);
-    assert(b->normalized);
-#endif
-    return (a->n[0] == b->n[0] && a->n[1] == b->n[1] && a->n[2] == b->n[2] && a->n[3] == b->n[3] && a->n[4] == b->n[4]);
-}
-
-void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
-    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-    for (int i=0; i<32; i++) {
-        for (int j=0; j<2; j++) {
-            int limb = (8*i+4*j)/52;
-            int shift = (8*i+4*j)%52;
-            r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
-        }
-    }
-#ifdef VERIFY
-    r->magnitude = 1;
-    r->normalized = 1;
-#endif
-}
-
-/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
-void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    for (int i=0; i<32; i++) {
-        int c = 0;
-        for (int j=0; j<2; j++) {
-            int limb = (8*i+4*j)/52;
-            int shift = (8*i+4*j)%52;
-            c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
-        }
-        r[31-i] = c;
-    }
-}
-
-void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
-#ifdef VERIFY
-    assert(a->magnitude <= m);
-    r->magnitude = m + 1;
-    r->normalized = 0;
-#endif
-    r->n[0] = 0xFFFFEFFFFFC2FULL * (m + 1) - a->n[0];
-    r->n[1] = 0xFFFFFFFFFFFFFULL * (m + 1) - a->n[1];
-    r->n[2] = 0xFFFFFFFFFFFFFULL * (m + 1) - a->n[2];
-    r->n[3] = 0xFFFFFFFFFFFFFULL * (m + 1) - a->n[3];
-    r->n[4] = 0x0FFFFFFFFFFFFULL * (m + 1) - a->n[4];
-}
-
-void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
-#ifdef VERIFY
-    r->magnitude *= a;
-    r->normalized = 0;
-#endif
-    r->n[0] *= a;
-    r->n[1] *= a;
-    r->n[2] *= a;
-    r->n[3] *= a;
-    r->n[4] *= a;
-}
-
-void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    r->magnitude += a->magnitude;
-    r->normalized = 0;
-#endif
-    r->n[0] += a->n[0];
-    r->n[1] += a->n[1];
-    r->n[2] += a->n[2];
-    r->n[3] += a->n[3];
-    r->n[4] += a->n[4];
-}
-
-void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    assert(a->magnitude <= 8);
-    assert(b->magnitude <= 8);
-    r->magnitude = 1;
-    r->normalized = 0;
-#endif
-    secp256k1_fe_mul_inner(a->n, b->n, r->n);
-}
-
-void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->magnitude <= 8);
-    r->magnitude = 1;
-    r->normalized = 0;
-#endif
-    secp256k1_fe_sqr_inner(a->n, r->n);
-}
-
-#endif
diff --git a/secp256k1/impl/field_5x52_asm.h b/secp256k1/impl/field_5x52_asm.h
deleted file mode 100644
index 93c6ab6b5..000000000
--- a/secp256k1/impl/field_5x52_asm.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
-#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
-
-void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r);
-void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r);
-
-#endif
diff --git a/secp256k1/impl/field_5x52_int128.h b/secp256k1/impl/field_5x52_int128.h
deleted file mode 100644
index 23cb13462..000000000
--- a/secp256k1/impl/field_5x52_int128.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
-#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
-
-#include <stdint.h>
-
-void static inline secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {
-    __int128 c = (__int128)a[0] * b[0];
-    uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
-    c = c + (__int128)a[0] * b[1] +
-            (__int128)a[1] * b[0];
-    uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
-    c = c + (__int128)a[0] * b[2] +
-            (__int128)a[1] * b[1] +
-            (__int128)a[2] * b[0];
-    uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
-    c = c + (__int128)a[0] * b[3] +
-            (__int128)a[1] * b[2] +
-            (__int128)a[2] * b[1] +
-            (__int128)a[3] * b[0];
-    uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
-    c = c + (__int128)a[0] * b[4] +
-            (__int128)a[1] * b[3] +
-            (__int128)a[2] * b[2] +
-            (__int128)a[3] * b[1] +
-            (__int128)a[4] * b[0];
-    uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
-    c = c + (__int128)a[1] * b[4] +
-            (__int128)a[2] * b[3] +
-            (__int128)a[3] * b[2] +
-            (__int128)a[4] * b[1];
-    uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
-    c = c + (__int128)a[2] * b[4] +
-            (__int128)a[3] * b[3] +
-            (__int128)a[4] * b[2];
-    uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
-    c = c + (__int128)a[3] * b[4] +
-            (__int128)a[4] * b[3];
-    uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
-    c = c + (__int128)a[4] * b[4];
-    uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
-    uint64_t t9 = c;
-
-    c = t0 + (__int128)t5 * 0x1000003D10ULL;
-    t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
-    t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
-    r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
-    r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
-    r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
-    c = t0 + (__int128)c * 0x1000003D1ULL;
-    r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
-    r[1] = t1 + c;
-
-}
-
-void static inline secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) {
-    __int128 c = (__int128)a[0] * a[0];
-    uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
-    c = c + (__int128)(a[0]*2) * a[1];
-    uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
-    c = c + (__int128)(a[0]*2) * a[2] +
-            (__int128)a[1] * a[1];
-    uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
-    c = c + (__int128)(a[0]*2) * a[3] +
-            (__int128)(a[1]*2) * a[2];
-    uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
-    c = c + (__int128)(a[0]*2) * a[4] +
-            (__int128)(a[1]*2) * a[3] +
-            (__int128)a[2] * a[2];
-    uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
-    c = c + (__int128)(a[1]*2) * a[4] +
-            (__int128)(a[2]*2) * a[3];
-    uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
-    c = c + (__int128)(a[2]*2) * a[4] +
-            (__int128)a[3] * a[3];
-    uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
-    c = c + (__int128)(a[3]*2) * a[4];
-    uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
-    c = c + (__int128)a[4] * a[4];
-    uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
-    uint64_t t9 = c;
-    c = t0 + (__int128)t5 * 0x1000003D10ULL;
-    t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
-    t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
-    r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
-    r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
-    c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
-    r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
-    c = t0 + (__int128)c * 0x1000003D1ULL;
-    r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
-    r[1] = t1 + c;
-
-}
-
-#endif
diff --git a/secp256k1/impl/field_gmp.h b/secp256k1/impl/field_gmp.h
deleted file mode 100644
index 6172ef48e..000000000
--- a/secp256k1/impl/field_gmp.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
-#define _SECP256K1_FIELD_REPR_IMPL_H_
-
-#include <stdio.h>
-#include <assert.h>
-#include <string.h>
-#include "../num.h"
-#include "../field.h"
-
-static mp_limb_t secp256k1_field_p[FIELD_LIMBS];
-static mp_limb_t secp256k1_field_pc[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS];
-
-void static secp256k1_fe_inner_start(void) {
-    for (int i=0; i<(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; i++)
-        secp256k1_field_pc[i] = 0;
-    secp256k1_field_pc[0] += 0x3D1UL;
-    secp256k1_field_pc[32/GMP_NUMB_BITS] += (1UL << (32 % GMP_NUMB_BITS));
-    for (int i=0; i<FIELD_LIMBS; i++) {
-        secp256k1_field_p[i] = 0;
-    }
-    mpn_sub(secp256k1_field_p, secp256k1_field_p, FIELD_LIMBS, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS);
-}
-
-void static secp256k1_fe_inner_stop(void) {
-}
-
-void static secp256k1_fe_normalize(secp256k1_fe_t *r) {
-    if (r->n[FIELD_LIMBS] != 0) {
-#if (GMP_NUMB_BITS >= 40)
-        mp_limb_t carry = mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x1000003D1ULL * r->n[FIELD_LIMBS]);
-        mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x1000003D1ULL * carry);
-#else
-        mp_limb_t carry = mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x3D1UL * r->n[FIELD_LIMBS]) + 
-                          mpn_add_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), r->n[FIELD_LIMBS] << (32 % GMP_NUMB_BITS));
-        mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x3D1UL * carry);
-        mpn_add_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), carry << (32%GMP_NUMB_BITS));
-#endif
-        r->n[FIELD_LIMBS] = 0;
-    }
-    if (mpn_cmp(r->n, secp256k1_field_p, FIELD_LIMBS) >= 0)
-        mpn_sub(r->n, r->n, FIELD_LIMBS, secp256k1_field_p, FIELD_LIMBS);
-}
-
-void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
-    r->n[0] = a;
-    for (int i=1; i<FIELD_LIMBS+1; i++)
-        r->n[i] = 0;
-}
-
-int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
-    int ret = 1;
-    for (int i=0; i<FIELD_LIMBS+1; i++)
-        ret &= (a->n[i] == 0);
-    return ret;
-}
-
-int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
-    return a->n[0] & 1;
-}
-
-int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-    int ret = 1;
-    for (int i=0; i<FIELD_LIMBS+1; i++)
-        ret &= (a->n[i] == b->n[i]);
-    return ret;
-}
-
-void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
-    for (int i=0; i<FIELD_LIMBS+1; i++)
-        r->n[i] = 0;
-    for (int i=0; i<256; i++) {
-        int limb = i/GMP_NUMB_BITS;
-        int shift = i%GMP_NUMB_BITS;
-        r->n[limb] |= (mp_limb_t)((a[31-i/8] >> (i%8)) & 0x1) << shift;
-    }
-}
-
-/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
-void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
-    for (int i=0; i<32; i++) {
-        int c = 0;
-        for (int j=0; j<8; j++) {
-            int limb = (8*i+j)/GMP_NUMB_BITS;
-            int shift = (8*i+j)%GMP_NUMB_BITS;
-            c |= ((a->n[limb] >> shift) & 0x1) << j;
-        }
-        r[31-i] = c;
-    }
-}
-
-void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
-    *r = *a;
-    secp256k1_fe_normalize(r);
-    for (int i=0; i<FIELD_LIMBS; i++)
-        r->n[i] = ~(r->n[i]);
-#if (GMP_NUMB_BITS >= 33)
-    mpn_sub_1(r->n, r->n, FIELD_LIMBS, 0x1000003D0ULL);
-#else
-    mpn_sub_1(r->n, r->n, FIELD_LIMBS, 0x3D0UL);
-    mpn_sub_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), 0x1UL << (32%GMP_NUMB_BITS));
-#endif
-}
-
-void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
-    mpn_mul_1(r->n, r->n, FIELD_LIMBS+1, a);
-}
-
-void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-    mpn_add(r->n, r->n, FIELD_LIMBS+1, a->n, FIELD_LIMBS+1);
-}
-
-void static secp256k1_fe_reduce(secp256k1_fe_t *r, mp_limb_t *tmp) {
-    // <A1 A2 A3 A4> <B1 B2 B3 B4>
-    //       B1 B2 B3 B4
-    // + C * A1 A2 A3 A4
-    // +  A1 A2 A3 A4
-
-#if (GMP_NUMB_BITS >= 33)
-    mp_limb_t o = mpn_addmul_1(tmp, tmp+FIELD_LIMBS, FIELD_LIMBS, 0x1000003D1ULL);
-#else
-    mp_limb_t o = mpn_addmul_1(tmp, tmp+FIELD_LIMBS, FIELD_LIMBS, 0x3D1UL) +
-                  mpn_addmul_1(tmp+(32/GMP_NUMB_BITS), tmp+FIELD_LIMBS, FIELD_LIMBS-(32/GMP_NUMB_BITS), 0x1UL << (32%GMP_NUMB_BITS));
-#endif
-    mp_limb_t q[1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS];
-    q[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS] = mpn_mul_1(q, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS, o);
-#if (GMP_NUMB_BITS <= 32)
-    mp_limb_t o2 = tmp[2*FIELD_LIMBS-(32/GMP_NUMB_BITS)] << (32%GMP_NUMB_BITS);
-    q[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS] += mpn_addmul_1(q, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS, o2);
-#endif
-    r->n[FIELD_LIMBS] = mpn_add(r->n, tmp, FIELD_LIMBS, q, 1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS);
-}
-
-void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-    secp256k1_fe_t ac = *a;
-    secp256k1_fe_t bc = *b;
-    secp256k1_fe_normalize(&ac);
-    secp256k1_fe_normalize(&bc);
-    mp_limb_t tmp[2*FIELD_LIMBS];
-    mpn_mul_n(tmp, ac.n, bc.n, FIELD_LIMBS);
-    secp256k1_fe_reduce(r, tmp);
-}
-
-void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-    secp256k1_fe_t ac = *a;
-    secp256k1_fe_normalize(&ac);
-    mp_limb_t tmp[2*FIELD_LIMBS];
-    mpn_sqr(tmp, ac.n, FIELD_LIMBS);
-    secp256k1_fe_reduce(r, tmp);
-}
-
-#endif
diff --git a/secp256k1/impl/group.h b/secp256k1/impl/group.h
deleted file mode 100644
index cfbe80081..000000000
--- a/secp256k1/impl/group.h
+++ /dev/null
@@ -1,403 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_GROUP_IMPL_H_
-#define _SECP256K1_GROUP_IMPL_H_
-
-#include <string.h>
-
-#include "../num.h"
-#include "../field.h"
-#include "../group.h"
-
-void static secp256k1_ge_set_infinity(secp256k1_ge_t *r) {
-    r->infinity = 1;
-}
-
-void static secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
-    r->infinity = 0;
-    r->x = *x;
-    r->y = *y;
-}
-
-int static secp256k1_ge_is_infinity(const secp256k1_ge_t *a) {
-    return a->infinity;
-}
-
-void static secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
-    r->infinity = a->infinity;
-    r->x = a->x;
-    r->y = a->y;
-    secp256k1_fe_normalize(&r->y);
-    secp256k1_fe_negate(&r->y, &r->y, 1);
-}
-
-void static secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a) {
-    char cx[65]; int lx=65;
-    char cy[65]; int ly=65;
-    secp256k1_fe_get_hex(cx, &lx, &a->x);
-    secp256k1_fe_get_hex(cy, &ly, &a->y);
-    lx = strlen(cx);
-    ly = strlen(cy);
-    int len = lx + ly + 3 + 1;
-    if (*rlen < len) {
-        *rlen = len;
-        return;
-    }
-    *rlen = len;
-    r[0] = '(';
-    memcpy(r+1, cx, lx);
-    r[1+lx] = ',';
-    memcpy(r+2+lx, cy, ly);
-    r[2+lx+ly] = ')';
-    r[3+lx+ly] = 0;
-}
-
-void static secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) {
-    secp256k1_fe_inv_var(&a->z, &a->z);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
-    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &a->z, &z2);
-    secp256k1_fe_mul(&a->x, &a->x, &z2);
-    secp256k1_fe_mul(&a->y, &a->y, &z3);
-    secp256k1_fe_set_int(&a->z, 1);
-    r->infinity = a->infinity;
-    r->x = a->x;
-    r->y = a->y;
-}
-
-void static secp256k1_gej_set_infinity(secp256k1_gej_t *r) {
-    r->infinity = 1;
-}
-
-void static secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
-    r->infinity = 0;
-    r->x = *x;
-    r->y = *y;
-    secp256k1_fe_set_int(&r->z, 1);
-}
-
-void static secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) {
-    r->x = *x;
-    secp256k1_fe_t x2; secp256k1_fe_sqr(&x2, x);
-    secp256k1_fe_t x3; secp256k1_fe_mul(&x3, x, &x2);
-    r->infinity = 0;
-    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
-    secp256k1_fe_add(&c, &x3);
-    secp256k1_fe_sqrt(&r->y, &c);
-    secp256k1_fe_normalize(&r->y);
-    if (secp256k1_fe_is_odd(&r->y) != odd)
-        secp256k1_fe_negate(&r->y, &r->y, 1);
-}
-
-void static secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
-   r->infinity = a->infinity;
-   r->x = a->x;
-   r->y = a->y;
-   secp256k1_fe_set_int(&r->z, 1);
-}
-
-void static secp256k1_gej_get_x(secp256k1_fe_t *r, const secp256k1_gej_t *a) {
-    secp256k1_fe_t zi2; secp256k1_fe_inv_var(&zi2, &a->z); secp256k1_fe_sqr(&zi2, &zi2);
-    secp256k1_fe_mul(r, &a->x, &zi2);
-}
-
-void static secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
-    r->infinity = a->infinity;
-    r->x = a->x;
-    r->y = a->y;
-    r->z = a->z;
-    secp256k1_fe_normalize(&r->y);
-    secp256k1_fe_negate(&r->y, &r->y, 1);
-}
-
-int static secp256k1_gej_is_infinity(const secp256k1_gej_t *a) {
-    return a->infinity;
-}
-
-int static secp256k1_gej_is_valid(const secp256k1_gej_t *a) {
-    if (a->infinity)
-        return 0;
-    // y^2 = x^3 + 7
-    // (Y/Z^3)^2 = (X/Z^2)^3 + 7
-    // Y^2 / Z^6 = X^3 / Z^6 + 7
-    // Y^2 = X^3 + 7*Z^6
-    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
-    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
-    secp256k1_fe_t z6; secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
-    secp256k1_fe_mul_int(&z6, 7);
-    secp256k1_fe_add(&x3, &z6);
-    secp256k1_fe_normalize(&y2);
-    secp256k1_fe_normalize(&x3);
-    return secp256k1_fe_equal(&y2, &x3);
-}
-
-int static secp256k1_ge_is_valid(const secp256k1_ge_t *a) {
-    if (a->infinity)
-        return 0;
-    // y^2 = x^3 + 7
-    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
-    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
-    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
-    secp256k1_fe_add(&x3, &c);
-    secp256k1_fe_normalize(&y2);
-    secp256k1_fe_normalize(&x3);
-    return secp256k1_fe_equal(&y2, &x3);
-}
-
-void static secp256k1_gej_double(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
-    secp256k1_fe_t t5 = a->y;
-    secp256k1_fe_normalize(&t5);
-    if (a->infinity || secp256k1_fe_is_zero(&t5)) {
-        r->infinity = 1;
-        return;
-    }
-
-    secp256k1_fe_t t1,t2,t3,t4;
-    secp256k1_fe_mul(&r->z, &t5, &a->z);
-    secp256k1_fe_mul_int(&r->z, 2);       // Z' = 2*Y*Z (2)
-    secp256k1_fe_sqr(&t1, &a->x);
-    secp256k1_fe_mul_int(&t1, 3);         // T1 = 3*X^2 (3)
-    secp256k1_fe_sqr(&t2, &t1);           // T2 = 9*X^4 (1)
-    secp256k1_fe_sqr(&t3, &t5);
-    secp256k1_fe_mul_int(&t3, 2);         // T3 = 2*Y^2 (2)
-    secp256k1_fe_sqr(&t4, &t3);
-    secp256k1_fe_mul_int(&t4, 2);         // T4 = 8*Y^4 (2)
-    secp256k1_fe_mul(&t3, &a->x, &t3);    // T3 = 2*X*Y^2 (1)
-    r->x = t3;
-    secp256k1_fe_mul_int(&r->x, 4);       // X' = 8*X*Y^2 (4)
-    secp256k1_fe_negate(&r->x, &r->x, 4); // X' = -8*X*Y^2 (5)
-    secp256k1_fe_add(&r->x, &t2);         // X' = 9*X^4 - 8*X*Y^2 (6)
-    secp256k1_fe_negate(&t2, &t2, 1);     // T2 = -9*X^4 (2)
-    secp256k1_fe_mul_int(&t3, 6);         // T3 = 12*X*Y^2 (6)
-    secp256k1_fe_add(&t3, &t2);           // T3 = 12*X*Y^2 - 9*X^4 (8)
-    secp256k1_fe_mul(&r->y, &t1, &t3);    // Y' = 36*X^3*Y^2 - 27*X^6 (1)
-    secp256k1_fe_negate(&t2, &t4, 2);     // T2 = -8*Y^4 (3)
-    secp256k1_fe_add(&r->y, &t2);         // Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4)
-    r->infinity = 0;
-}
-
-void static secp256k1_gej_add(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b) {
-    if (a->infinity) {
-        *r = *b;
-        return;
-    }
-    if (b->infinity) {
-        *r = *a;
-        return;
-    }
-    r->infinity = 0;
-    secp256k1_fe_t z22; secp256k1_fe_sqr(&z22, &b->z);
-    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
-    secp256k1_fe_t u1; secp256k1_fe_mul(&u1, &a->x, &z22);
-    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
-    secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
-    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_normalize(&u1);
-    secp256k1_fe_normalize(&u2);
-    if (secp256k1_fe_equal(&u1, &u2)) {
-        secp256k1_fe_normalize(&s1);
-        secp256k1_fe_normalize(&s2);
-        if (secp256k1_fe_equal(&s1, &s2)) {
-            secp256k1_gej_double(r, a);
-        } else {
-            r->infinity = 1;
-        }
-        return;
-    }
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
-    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
-    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
-    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
-    secp256k1_fe_mul(&r->z, &a->z, &b->z); secp256k1_fe_mul(&r->z, &r->z, &h);
-    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
-    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
-    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
-    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
-    secp256k1_fe_add(&r->y, &h3);
-}
-
-void static secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
-    if (a->infinity) {
-        r->infinity = b->infinity;
-        r->x = b->x;
-        r->y = b->y;
-        secp256k1_fe_set_int(&r->z, 1);
-        return;
-    }
-    if (b->infinity) {
-        *r = *a;
-        return;
-    }
-    r->infinity = 0;
-    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
-    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1);
-    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1);
-    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_normalize(&u1);
-    secp256k1_fe_normalize(&u2);
-    if (secp256k1_fe_equal(&u1, &u2)) {
-        secp256k1_fe_normalize(&s1);
-        secp256k1_fe_normalize(&s2);
-        if (secp256k1_fe_equal(&s1, &s2)) {
-            secp256k1_gej_double(r, a);
-        } else {
-            r->infinity = 1;
-        }
-        return;
-    }
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
-    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
-    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
-    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
-    r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
-    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
-    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
-    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
-    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
-    secp256k1_fe_add(&r->y, &h3);
-}
-
-void static secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a) {
-    secp256k1_gej_t c = *a;
-    secp256k1_ge_t t; secp256k1_ge_set_gej(&t, &c);
-    secp256k1_ge_get_hex(r, rlen, &t);
-}
-
-#ifdef USE_ENDOMORPHISM
-void static secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
-    const secp256k1_fe_t *beta = &secp256k1_ge_consts->beta;
-    *r = *a;
-    secp256k1_fe_mul(&r->x, &r->x, beta);
-}
-
-void static secp256k1_gej_split_exp(secp256k1_num_t *r1, secp256k1_num_t *r2, const secp256k1_num_t *a) {
-    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
-    secp256k1_num_t bnc1, bnc2, bnt1, bnt2, bnn2;
-
-    secp256k1_num_init(&bnc1);
-    secp256k1_num_init(&bnc2);
-    secp256k1_num_init(&bnt1);
-    secp256k1_num_init(&bnt2);
-    secp256k1_num_init(&bnn2);
-
-    secp256k1_num_copy(&bnn2, &c->order);
-    secp256k1_num_shift(&bnn2, 1);
-
-    secp256k1_num_mul(&bnc1, a, &c->a1b2);
-    secp256k1_num_add(&bnc1, &bnc1, &bnn2);
-    secp256k1_num_div(&bnc1, &bnc1, &c->order);
-
-    secp256k1_num_mul(&bnc2, a, &c->b1);
-    secp256k1_num_add(&bnc2, &bnc2, &bnn2);
-    secp256k1_num_div(&bnc2, &bnc2, &c->order);
-
-    secp256k1_num_mul(&bnt1, &bnc1, &c->a1b2);
-    secp256k1_num_mul(&bnt2, &bnc2, &c->a2);
-    secp256k1_num_add(&bnt1, &bnt1, &bnt2);
-    secp256k1_num_sub(r1, a, &bnt1);
-    secp256k1_num_mul(&bnt1, &bnc1, &c->b1);
-    secp256k1_num_mul(&bnt2, &bnc2, &c->a1b2);
-    secp256k1_num_sub(r2, &bnt1, &bnt2);
-
-    secp256k1_num_free(&bnc1);
-    secp256k1_num_free(&bnc2);
-    secp256k1_num_free(&bnt1);
-    secp256k1_num_free(&bnt2);
-    secp256k1_num_free(&bnn2);
-}
-#endif
-
-
-void static secp256k1_ge_start(void) {
-    static const unsigned char secp256k1_ge_consts_order[] = {
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
-        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
-        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
-    };
-    static const unsigned char secp256k1_ge_consts_g_x[] = {
-        0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,
-        0x55,0xA0,0x62,0x95,0xCE,0x87,0x0B,0x07,
-        0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,
-        0x59,0xF2,0x81,0x5B,0x16,0xF8,0x17,0x98
-    };
-    static const unsigned char secp256k1_ge_consts_g_y[] = {
-        0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,
-        0x5D,0xA4,0xFB,0xFC,0x0E,0x11,0x08,0xA8,
-        0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,
-        0x9C,0x47,0xD0,0x8F,0xFB,0x10,0xD4,0xB8
-    };
-#ifdef USE_ENDOMORPHISM
-    // properties of secp256k1's efficiently computable endomorphism
-    static const unsigned char secp256k1_ge_consts_lambda[] = {
-        0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,
-        0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
-        0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,
-        0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72
-    };
-    static const unsigned char secp256k1_ge_consts_beta[] = {
-        0x7a,0xe9,0x6a,0x2b,0x65,0x7c,0x07,0x10,
-        0x6e,0x64,0x47,0x9e,0xac,0x34,0x34,0xe9,
-        0x9c,0xf0,0x49,0x75,0x12,0xf5,0x89,0x95,
-        0xc1,0x39,0x6c,0x28,0x71,0x95,0x01,0xee
-    };
-    static const unsigned char secp256k1_ge_consts_a1b2[] = {
-        0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,
-        0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15
-    };
-    static const unsigned char secp256k1_ge_consts_b1[] = {
-        0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,
-        0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3
-    };
-    static const unsigned char secp256k1_ge_consts_a2[] = {
-        0x01,
-        0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,
-        0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8
-    };
-#endif
-    if (secp256k1_ge_consts == NULL) {
-        secp256k1_ge_consts_t *ret = (secp256k1_ge_consts_t*)malloc(sizeof(secp256k1_ge_consts_t));
-        secp256k1_num_init(&ret->order);
-        secp256k1_num_init(&ret->half_order);
-        secp256k1_num_set_bin(&ret->order,  secp256k1_ge_consts_order,  sizeof(secp256k1_ge_consts_order));
-        secp256k1_num_copy(&ret->half_order, &ret->order);
-        secp256k1_num_shift(&ret->half_order, 1);
-#ifdef USE_ENDOMORPHISM
-        secp256k1_num_init(&ret->lambda);
-        secp256k1_num_init(&ret->a1b2);
-        secp256k1_num_init(&ret->a2);
-        secp256k1_num_init(&ret->b1);
-        secp256k1_num_set_bin(&ret->lambda, secp256k1_ge_consts_lambda, sizeof(secp256k1_ge_consts_lambda));
-        secp256k1_num_set_bin(&ret->a1b2,   secp256k1_ge_consts_a1b2,   sizeof(secp256k1_ge_consts_a1b2));
-        secp256k1_num_set_bin(&ret->a2,     secp256k1_ge_consts_a2,     sizeof(secp256k1_ge_consts_a2));
-        secp256k1_num_set_bin(&ret->b1,     secp256k1_ge_consts_b1,     sizeof(secp256k1_ge_consts_b1));
-        secp256k1_fe_set_b32(&ret->beta, secp256k1_ge_consts_beta);
-#endif
-        secp256k1_fe_t g_x, g_y;
-        secp256k1_fe_set_b32(&g_x, secp256k1_ge_consts_g_x);
-        secp256k1_fe_set_b32(&g_y, secp256k1_ge_consts_g_y);
-        secp256k1_ge_set_xy(&ret->g, &g_x, &g_y);
-        secp256k1_ge_consts = ret;
-    }
-}
-
-void static secp256k1_ge_stop(void) {
-    if (secp256k1_ge_consts != NULL) {
-        secp256k1_ge_consts_t *c = (secp256k1_ge_consts_t*)secp256k1_ge_consts;
-        secp256k1_num_free(&c->order);
-        secp256k1_num_free(&c->half_order);
-        secp256k1_num_free(&c->lambda);
-        secp256k1_num_free(&c->a1b2);
-        secp256k1_num_free(&c->a2);
-        secp256k1_num_free(&c->b1);
-        free((void*)c);
-        secp256k1_ge_consts = NULL;
-    }
-}
-
-#endif
diff --git a/secp256k1/impl/num.h b/secp256k1/impl/num.h
deleted file mode 100644
index 21e3390b5..000000000
--- a/secp256k1/impl/num.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_IMPL_H_
-#define _SECP256K1_NUM_IMPL_H_
-
-#include "../num.h"
-
-#if defined(USE_NUM_GMP)
-#include "num_gmp.h"
-#elif defined(USE_NUM_OPENSSL)
-#include "num_openssl.h"
-#elif defined(USE_NUM_BOOST)
-#include "num_boost.h"
-#else
-#error "Please select num implementation"
-#endif
-
-#endif
diff --git a/secp256k1/impl/num_boost.h b/secp256k1/impl/num_boost.h
deleted file mode 100644
index b808e3214..000000000
--- a/secp256k1/impl/num_boost.h
+++ /dev/null
@@ -1,212 +0,0 @@
-// Copyright (c) 2014 Tim Hughes
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_REPR_IMPL_H_
-#define _SECP256K1_NUM_REPR_IMPL_H_
-#include <assert.h>
-#include <boost/math/common_factor.hpp>
-
-void static secp256k1_num_init(secp256k1_num_t *r)
-{
-	*r = 0;
-}
-
-void static secp256k1_num_free(secp256k1_num_t*)
-{
-}
-
-void static secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a)
-{
-	*r = *a;
-}
-
-int static secp256k1_num_bits(const secp256k1_num_t *a)
-{
-	int numLimbs = a->backend().size();
-    int ret = (numLimbs - 1) * a->backend().limb_bits;
-    for (auto x = a->backend().limbs()[numLimbs - 1]; x; x >>= 1, ++ret);
-    return ret;
-}
-
-void static secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a)
-{
-	for (auto n = abs(*a); n; n >>= 8)
-	{
-		assert(rlen > 0); // out of space?
-		r[--rlen] = n.convert_to<unsigned char>();
-	}
-	memset(r, 0, rlen);
-}
-
-void static secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen)
-{
-	*r = 0;
-	for (unsigned int i = 0; i != alen; ++i)
-	{
-		*r <<= 8;
-		*r |= a[i];
-	}
-}
-
-void static secp256k1_num_set_int(secp256k1_num_t *r, int a)
-{
-    *r = a;
-}
-
-void static secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m)
-{
-	*r %= *m;
-}
-
-void static secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *n, const secp256k1_num_t *m)
-{
-	// http://rosettacode.org/wiki/Modular_inverse
-	secp256k1_num_t a = *n;
-	secp256k1_num_t b = *m;
-	secp256k1_num_t x0 = 0;
-	secp256k1_num_t x1 = 1;
-	assert(*n > 0);
-	assert(*m > 0);
-	if (b != 1)
-	{
-		secp256k1_num_t q, t;
-		while (a > 1)
-		{
-			boost::multiprecision::divide_qr(a, b, q, t);
-			a = b; b = t;
-
-			t = x1 - q * x0;
-			x1 = x0; x0 = t;
-		}
-		if (x1 < 0)
-		{
-			x1 += *m;
-		}
-	}
-	*r = x1;
-
-	// check result
-	#ifdef _DEBUG
-	{
-		typedef boost::multiprecision::number<boost::multiprecision::cpp_int_backend<512, 512, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>> bignum;
-		bignum br = *r, bn = *n, bm = *m;
-		assert((((bn) * (br)) % bm) == 1);
-	}
-	#endif
-}
-
-int static secp256k1_num_is_zero(const secp256k1_num_t *a)
-{
-    return a->is_zero();
-}
-
-int static secp256k1_num_is_odd(const secp256k1_num_t *a)
-{
-    return boost::multiprecision::bit_test(*a, 0);
-}
-
-int static secp256k1_num_is_neg(const secp256k1_num_t *a)
-{
-	return a->backend().isneg();
-}
-
-int static secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b)
-{
-	return a->backend().compare_unsigned(b->backend());
-}
-
-void static secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b)
-{
-	*r = (*a) + (*b);
-}
-
-void static secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b)
-{
-	*r = (*a) - (*b);
-}
-
-void static secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b)
-{
-	*r = (*a) * (*b);
-}
-
-void static secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b)
-{
-	*r = (*a) / (*b);
-}
-
-void static secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m)
-{
-    secp256k1_num_mul(r, a, b);
-    secp256k1_num_mod(r, m);
-}
-
-int static secp256k1_num_shift(secp256k1_num_t *r, int bits)
-{
-	unsigned ret = r->convert_to<unsigned>() & ((1 << bits) - 1);
-	*r >>= bits;
-	return ret;
-}
-
-int static secp256k1_num_get_bit(const secp256k1_num_t *a, int pos)
-{
-	return boost::multiprecision::bit_test(*a, pos);
-}
-
-void static secp256k1_num_inc(secp256k1_num_t *r)
-{
-	++*r;
-}
-
-void static secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen)
-{
-    static const unsigned char cvt[256] = {
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
-        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0
-    };
-	*r = 0;
-	for (int i = 0; i != alen; ++i)
-	{
-		*r <<= 4;
-		*r |= cvt[a[i]];
-	}
-}
-
-void static secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a)
-{
-    static const unsigned char cvt[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
-	for (auto n = *a; n; n >>= 4)
-	{
-		assert(rlen > 0); // out of space?
-		r[--rlen] = cvt[n.convert_to<unsigned char>() & 15];
-	}
-	memset(r, '0', rlen);
-}
-
-void static secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits)
-{
-	*rl = *a & ((secp256k1_num_t(1) << bits) - 1);
-	*rh = *a >> bits;
-}
-
-void static secp256k1_num_negate(secp256k1_num_t *r)
-{
-	r->backend().negate();
-}
-
-#endif
diff --git a/secp256k1/impl/num_gmp.h b/secp256k1/impl/num_gmp.h
deleted file mode 100644
index 067c15180..000000000
--- a/secp256k1/impl/num_gmp.h
+++ /dev/null
@@ -1,346 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_REPR_IMPL_H_
-#define _SECP256K1_NUM_REPR_IMPL_H_
-
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-#include <gmp.h>
-
-#include "num.h"
-
-#ifdef VERIFY
-void static secp256k1_num_sanity(const secp256k1_num_t *a) {
-    assert(a->limbs == 1 || (a->limbs > 1 && a->data[a->limbs-1] != 0));
-}
-#else
-#define secp256k1_num_sanity(a) do { } while(0)
-#endif
-
-void static secp256k1_num_init(secp256k1_num_t *r) {
-    r->neg = 0;
-    r->limbs = 1;
-    r->data[0] = 0;
-}
-
-void static secp256k1_num_free(secp256k1_num_t *r) {
-}
-
-void static secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) {
-    *r = *a;
-}
-
-int static secp256k1_num_bits(const secp256k1_num_t *a) {
-    int ret=(a->limbs-1)*GMP_NUMB_BITS;
-    mp_limb_t x=a->data[a->limbs-1];
-    while (x) {
-        x >>= 1;
-        ret++;
-    }
-    return ret;
-}
-
-
-void static secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) {
-    unsigned char tmp[65];
-    int len = 0;
-    if (a->limbs>1 || a->data[0] != 0) {
-        len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs);
-    }
-    int shift = 0;
-    while (shift < len && tmp[shift] == 0) shift++;
-    assert(len-shift <= rlen);
-    memset(r, 0, rlen - len + shift);
-    if (len > shift)
-        memcpy(r + rlen - len + shift, tmp + shift, len - shift);
-}
-
-void static secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) {
-    assert(alen > 0);
-    assert(alen <= 64);
-    int len = mpn_set_str(r->data, a, alen, 256);
-    assert(len <= NUM_LIMBS*2);
-    r->limbs = len;
-    r->neg = 0;
-    while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
-}
-
-void static secp256k1_num_set_int(secp256k1_num_t *r, int a) {
-    r->limbs = 1;
-    r->neg = (a < 0);
-    r->data[0] = (a < 0) ? -a : a;
-}
-
-void static secp256k1_num_add_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    mp_limb_t c = mpn_add(r->data, a->data, a->limbs, b->data, b->limbs);
-    r->limbs = a->limbs;
-    if (c != 0) {
-        assert(r->limbs < 2*NUM_LIMBS);
-        r->data[r->limbs++] = c;
-    }
-}
-
-void static secp256k1_num_sub_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    mp_limb_t c = mpn_sub(r->data, a->data, a->limbs, b->data, b->limbs);
-    assert(c == 0);
-    r->limbs = a->limbs;
-    while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
-}
-
-void static secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) {
-    secp256k1_num_sanity(r);
-    secp256k1_num_sanity(m);
-
-    if (r->limbs >= m->limbs) {
-        mp_limb_t t[2*NUM_LIMBS];
-        mpn_tdiv_qr(t, r->data, 0, r->data, r->limbs, m->data, m->limbs);
-        r->limbs = m->limbs;
-        while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
-    }
-
-    if (r->neg && (r->limbs > 1 || r->data[0] != 0)) {
-        secp256k1_num_sub_abs(r, m, r);
-        r->neg = 0;
-    }
-}
-
-void static secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) {
-    secp256k1_num_sanity(a);
-    secp256k1_num_sanity(m);
-
-    // mpn_gcdext computes: (G,S) = gcdext(U,V), where
-    // * G = gcd(U,V)
-    // * G = U*S + V*T
-    // * U has equal or more limbs than V, and V has no padding
-    // If we set U to be (a padded version of) a, and V = m:
-    //   G = a*S + m*T
-    //   G = a*S mod m
-    // Assuming G=1:
-    //   S = 1/a mod m
-    assert(m->limbs <= NUM_LIMBS);
-    assert(m->data[m->limbs-1] != 0);
-    mp_limb_t g[NUM_LIMBS+1];
-    mp_limb_t u[NUM_LIMBS+1];
-    mp_limb_t v[NUM_LIMBS+1];
-    for (int i=0; i < m->limbs; i++) {
-        u[i] = (i < a->limbs) ? a->data[i] : 0;
-        v[i] = m->data[i];
-    }
-    mp_size_t sn = NUM_LIMBS+1;
-    mp_size_t gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
-    assert(gn == 1);
-    assert(g[0] == 1);
-    r->neg = a->neg ^ m->neg;
-    if (sn < 0) {
-        mpn_sub(r->data, m->data, m->limbs, r->data, -sn);
-        r->limbs = m->limbs;
-        while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
-    } else {
-        r->limbs = sn;
-    }
-}
-
-int static secp256k1_num_is_zero(const secp256k1_num_t *a) {
-    return (a->limbs == 1 && a->data[0] == 0);
-}
-
-int static secp256k1_num_is_odd(const secp256k1_num_t *a) {
-    return a->data[0] & 1;
-}
-
-int static secp256k1_num_is_neg(const secp256k1_num_t *a) {
-    return (a->limbs > 1 || a->data[0] != 0) && a->neg;
-}
-
-int static secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    if (a->limbs > b->limbs) return 1;
-    if (a->limbs < b->limbs) return -1;
-    return mpn_cmp(a->data, b->data, a->limbs);
-}
-
-void static secp256k1_num_subadd(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, int bneg) {
-    if (!(b->neg ^ bneg ^ a->neg)) { // a and b have the same sign
-        r->neg = a->neg;
-        if (a->limbs >= b->limbs) {
-            secp256k1_num_add_abs(r, a, b);
-        } else {
-            secp256k1_num_add_abs(r, b, a);
-        }
-    } else {
-        if (secp256k1_num_cmp(a, b) > 0) {
-            r->neg = a->neg;
-            secp256k1_num_sub_abs(r, a, b);
-        } else {
-            r->neg = b->neg ^ bneg;
-            secp256k1_num_sub_abs(r, b, a);
-        }
-    }
-}
-
-void static secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    secp256k1_num_sanity(a);
-    secp256k1_num_sanity(b);
-    secp256k1_num_subadd(r, a, b, 0);
-}
-
-void static secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    secp256k1_num_sanity(a);
-    secp256k1_num_sanity(b);
-    secp256k1_num_subadd(r, a, b, 1);
-}
-
-void static secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    secp256k1_num_sanity(a);
-    secp256k1_num_sanity(b);
-
-    mp_limb_t tmp[2*NUM_LIMBS+1];
-    assert(a->limbs + b->limbs <= 2*NUM_LIMBS+1);
-    if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) {
-        r->limbs = 1;
-        r->neg = 0;
-        r->data[0] = 0;
-        return;
-    }
-    if (a->limbs >= b->limbs)
-        mpn_mul(tmp, a->data, a->limbs, b->data, b->limbs);
-    else
-        mpn_mul(tmp, b->data, b->limbs, a->data, a->limbs);
-    r->limbs = a->limbs + b->limbs;
-    if (r->limbs > 1 && tmp[r->limbs - 1]==0) r->limbs--;
-    assert(r->limbs <= 2*NUM_LIMBS);
-    mpn_copyi(r->data, tmp, r->limbs);
-    r->neg = a->neg ^ b->neg;
-}
-
-void static secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    secp256k1_num_sanity(a);
-    secp256k1_num_sanity(b);
-    if (b->limbs > a->limbs) {
-        r->limbs = 1;
-        r->data[0] = 0;
-        r->neg = 0;
-        return;
-    }
-
-    mp_limb_t quo[2*NUM_LIMBS+1];
-    mp_limb_t rem[2*NUM_LIMBS+1];
-    mpn_tdiv_qr(quo, rem, 0, a->data, a->limbs, b->data, b->limbs);
-    mpn_copyi(r->data, quo, a->limbs - b->limbs + 1);
-    r->limbs = a->limbs - b->limbs + 1;
-    while (r->limbs > 1 && r->data[r->limbs - 1]==0) r->limbs--;
-    r->neg = a->neg ^ b->neg;
-}
-
-void static secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m) {
-    secp256k1_num_mul(r, a, b);
-    secp256k1_num_mod(r, m);
-}
-
-
-int static secp256k1_num_shift(secp256k1_num_t *r, int bits) {
-    assert(bits <= GMP_NUMB_BITS);
-    mp_limb_t ret = mpn_rshift(r->data, r->data, r->limbs, bits);
-    if (r->limbs>1 && r->data[r->limbs-1]==0) r->limbs--;
-    ret >>= (GMP_NUMB_BITS - bits);
-    return ret;
-}
-
-int static secp256k1_num_get_bit(const secp256k1_num_t *a, int pos) {
-    return (a->limbs*GMP_NUMB_BITS > pos) && ((a->data[pos/GMP_NUMB_BITS] >> (pos % GMP_NUMB_BITS)) & 1);
-}
-
-void static secp256k1_num_inc(secp256k1_num_t *r) {
-    mp_limb_t ret = mpn_add_1(r->data, r->data, r->limbs, (mp_limb_t)1);
-    if (ret) {
-        assert(r->limbs < 2*NUM_LIMBS);
-        r->data[r->limbs++] = ret;
-    }
-}
-
-void static secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen) {
-    static const unsigned char cvt[256] = {
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
-        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0
-    };
-    unsigned char num[257] = {};
-    for (int i=0; i<alen; i++) {
-        num[i] = cvt[a[i]];
-    }
-    r->limbs = mpn_set_str(r->data, num, alen, 16);
-    while (r->limbs > 1 && r->data[r->limbs-1] == 0) r->limbs--;
-}
-
-void static secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a) {
-    static const unsigned char cvt[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
-    unsigned char *tmp = malloc(257);
-    mp_size_t len = mpn_get_str(tmp, 16, (mp_limb_t*)a->data, a->limbs);
-    assert(len <= rlen);
-    for (int i=0; i<len; i++) {
-        assert(rlen-len+i >= 0);
-        assert(rlen-len+i < rlen);
-        assert(tmp[i] >= 0);
-        assert(tmp[i] < 16);
-        r[rlen-len+i] = cvt[tmp[i]];
-    }
-    for (int i=0; i<rlen-len; i++) {
-        assert(i >= 0);
-        assert(i < rlen);
-        r[i] = cvt[0];
-    }
-    free(tmp);
-}
-
-void static secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits) {
-    assert(bits > 0);
-    rh->neg = a->neg;
-    if (bits >= a->limbs * GMP_NUMB_BITS) {
-        *rl = *a;
-        rh->limbs = 1;
-        rh->data[0] = 0;
-        return;
-    }
-    rl->limbs = 0;
-    rl->neg = a->neg;
-    int left = bits;
-    while (left >= GMP_NUMB_BITS) {
-        rl->data[rl->limbs] = a->data[rl->limbs];
-        rl->limbs++;
-        left -= GMP_NUMB_BITS;
-    }
-    if (left == 0) {
-        mpn_copyi(rh->data, a->data + rl->limbs, a->limbs - rl->limbs);
-        rh->limbs = a->limbs - rl->limbs;
-    } else {
-        mpn_rshift(rh->data, a->data + rl->limbs, a->limbs - rl->limbs, left);
-        rh->limbs = a->limbs - rl->limbs;
-        while (rh->limbs>1 && rh->data[rh->limbs-1]==0) rh->limbs--;
-    }
-    if (left > 0) {
-        rl->data[rl->limbs] = a->data[rl->limbs] & ((((mp_limb_t)1) << left) - 1);
-        rl->limbs++;
-    }
-    while (rl->limbs>1 && rl->data[rl->limbs-1]==0) rl->limbs--;
-}
-
-void static secp256k1_num_negate(secp256k1_num_t *r) {
-    r->neg ^= 1;
-}
-
-#endif
diff --git a/secp256k1/impl/num_openssl.h b/secp256k1/impl/num_openssl.h
deleted file mode 100644
index 0a54689ac..000000000
--- a/secp256k1/impl/num_openssl.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_REPR_IMPL_H_
-#define _SECP256K1_NUM_REPR_IMPL_H_
-
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-#include <openssl/bn.h>
-#include <openssl/crypto.h>
-
-#include "../num.h"
-
-void static secp256k1_num_init(secp256k1_num_t *r) {
-    BN_init(&r->bn);
-}
-
-void static secp256k1_num_free(secp256k1_num_t *r) {
-    BN_free(&r->bn);
-}
-
-void static secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) {
-    BN_copy(&r->bn, &a->bn);
-}
-
-void static secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) {
-    unsigned int size = BN_num_bytes(&a->bn);
-    assert(size <= rlen);
-    memset(r,0,rlen);
-    BN_bn2bin(&a->bn, r + rlen - size);
-}
-
-void static secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) {
-    BN_bin2bn(a, alen, &r->bn);
-}
-
-void static secp256k1_num_set_int(secp256k1_num_t *r, int a) {
-    BN_set_word(&r->bn, a < 0 ? -a : a);
-    BN_set_negative(&r->bn, a < 0);
-}
-
-void static secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) {
-    BN_CTX *ctx = BN_CTX_new();
-    BN_mod_inverse(&r->bn, &a->bn, &m->bn, ctx);
-    BN_CTX_free(ctx);
-}
-
-void static secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m) {
-    BN_CTX *ctx = BN_CTX_new();
-    BN_mod_mul(&r->bn, &a->bn, &b->bn, &m->bn, ctx);
-    BN_CTX_free(ctx);
-}
-
-int static secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    return BN_cmp(&a->bn, &b->bn);
-}
-
-void static secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    BN_add(&r->bn, &a->bn, &b->bn);
-}
-
-void static secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    BN_sub(&r->bn, &a->bn, &b->bn);
-}
-
-void static secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    BN_CTX *ctx = BN_CTX_new();
-    BN_mul(&r->bn, &a->bn, &b->bn, ctx);
-    BN_CTX_free(ctx);
-}
-
-void static secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
-    BN_CTX *ctx = BN_CTX_new();
-    BN_div(&r->bn, NULL, &a->bn, &b->bn, ctx);
-    BN_CTX_free(ctx);
-}
-
-void static secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) {
-    BN_CTX *ctx = BN_CTX_new();
-    BN_nnmod(&r->bn, &r->bn, &m->bn, ctx);
-    BN_CTX_free(ctx);
-}
-
-int static secp256k1_num_bits(const secp256k1_num_t *a) {
-    return BN_num_bits(&a->bn);
-}
-
-int static secp256k1_num_shift(secp256k1_num_t *r, int bits) {
-    int ret = BN_is_zero(&r->bn) ? 0 : r->bn.d[0] & ((1 << bits) - 1);
-    BN_rshift(&r->bn, &r->bn, bits);
-    return ret;
-}
-
-int static secp256k1_num_is_zero(const secp256k1_num_t *a) {
-    return BN_is_zero(&a->bn);
-}
-
-int static secp256k1_num_is_odd(const secp256k1_num_t *a) {
-    return BN_is_odd(&a->bn);
-}
-
-int static secp256k1_num_is_neg(const secp256k1_num_t *a) {
-    return BN_is_negative(&a->bn);
-}
-
-int static secp256k1_num_get_bit(const secp256k1_num_t *a, int pos) {
-    return BN_is_bit_set(&a->bn, pos);
-}
-
-void static secp256k1_num_inc(secp256k1_num_t *r) {
-    BN_add_word(&r->bn, 1);
-}
-
-void static secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen) {
-    char *str = (char*)malloc(alen+1);
-    memcpy(str, a, alen);
-    str[alen] = 0;
-    BIGNUM *pbn = &r->bn;
-    BN_hex2bn(&pbn, str);
-    free(str);
-}
-
-void static secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a) {
-    char *str = BN_bn2hex(&a->bn);
-    int len = strlen(str);
-    assert(rlen >= len);
-    for (int i=0; i<rlen-len; i++)
-        r[i] = '0';
-    memcpy(r+rlen-len, str, len);
-    OPENSSL_free(str);
-}
-
-void static secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits) {
-    BN_copy(&rl->bn, &a->bn);
-    BN_rshift(&rh->bn, &a->bn, bits);
-    BN_mask_bits(&rl->bn, bits);
-}
-
-void static secp256k1_num_negate(secp256k1_num_t *r) {
-    BN_set_negative(&r->bn, !BN_is_negative(&r->bn));
-}
-
-#endif
diff --git a/secp256k1/impl/util.h b/secp256k1/impl/util.h
deleted file mode 100644
index a59a00cac..000000000
--- a/secp256k1/impl/util.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_UTIL_IMPL_H_
-#define _SECP256K1_UTIL_IMPL_H_
-
-#include <stdint.h>
-#include <string.h>
-
-#include "../util.h"
-
-static inline uint32_t secp256k1_rand32(void) {
-    static uint32_t Rz = 11, Rw = 11;
-    Rz = 36969 * (Rz & 0xFFFF) + (Rz >> 16);
-    Rw = 18000 * (Rw & 0xFFFF) + (Rw >> 16);
-    return (Rw << 16) + (Rw >> 16) + Rz;
-}
-
-static void secp256k1_rand256(unsigned char *b32) {
-    for (int i=0; i<8; i++) {
-        uint32_t r = secp256k1_rand32();
-        b32[i*4 + 0] = (r >>  0) & 0xFF;
-        b32[i*4 + 1] = (r >>  8) & 0xFF;
-        b32[i*4 + 2] = (r >> 16) & 0xFF;
-        b32[i*4 + 3] = (r >> 24) & 0xFF;
-    }
-}
-
-static void secp256k1_rand256_test(unsigned char *b32) {
-    int bits=0;
-    memset(b32, 0, 32);
-    while (bits < 256) {
-        uint32_t ent = secp256k1_rand32();
-        int now = 1 + ((ent % 64)*((ent >> 6) % 32)+16)/31;
-        uint32_t val = 1 & (ent >> 11);
-        while (now > 0 && bits < 256) {
-            b32[bits / 8] |= val << (bits % 8);
-            now--;
-            bits++;
-        }
-    }
-}
-
-#endif
diff --git a/secp256k1/include/secp256k1.h b/secp256k1/include/secp256k1.h
new file mode 100644
index 000000000..06afd4c65
--- /dev/null
+++ b/secp256k1/include/secp256k1.h
@@ -0,0 +1,347 @@
+#ifndef _SECP256K1_
+# define _SECP256K1_
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+
+# if !defined(SECP256K1_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(2,7)
+#   define SECP256K1_INLINE __inline__
+#  elif (defined(_MSC_VER))
+#   define SECP256K1_INLINE __inline
+#  else
+#   define SECP256K1_INLINE
+#  endif
+# else
+#  define SECP256K1_INLINE inline
+# endif
+
+/**Warning attributes
+  * NONNULL is not used if SECP256K1_BUILD is set to avoid the compiler optimizing out
+  * some paranoid null checks. */
+# if defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+# else
+#  define SECP256K1_WARN_UNUSED_RESULT
+# endif
+# if !defined(SECP256K1_BUILD) && defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_ARG_NONNULL(_x)  __attribute__ ((__nonnull__(_x)))
+# else
+#  define SECP256K1_ARG_NONNULL(_x)
+# endif
+
+/** Opaque data structure that holds context information (precomputed tables etc.).
+ *  Only functions that take a pointer to a non-const context require exclusive
+ *  access to it. Multiple functions that take a pointer to a const context may
+ *  run simultaneously.
+ */
+typedef struct secp256k1_context_struct secp256k1_context_t;
+
+/** Flags to pass to secp256k1_context_create. */
+# define SECP256K1_CONTEXT_VERIFY (1 << 0)
+# define SECP256K1_CONTEXT_SIGN   (1 << 1)
+
+/** Create a secp256k1 context object.
+ *  Returns: a newly created context object.
+ *  In:      flags: which parts of the context to initialize.
+ */
+secp256k1_context_t* secp256k1_context_create(
+  int flags
+) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Copies a secp256k1 context object.
+ *  Returns: a newly created context object.
+ *  In:      ctx: an existing context to copy
+ */
+secp256k1_context_t* secp256k1_context_clone(
+  const secp256k1_context_t* ctx
+) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Destroy a secp256k1 context object.
+ *  The context pointer may not be used afterwards.
+ */
+void secp256k1_context_destroy(
+  secp256k1_context_t* ctx
+) SECP256K1_ARG_NONNULL(1);
+
+/** Verify an ECDSA signature.
+ *  Returns: 1: correct signature
+ *           0: incorrect signature
+ *          -1: invalid public key
+ *          -2: invalid signature
+ * In:       ctx:       a secp256k1 context object, initialized for verification.
+ *           msg32:     the 32-byte message hash being verified (cannot be NULL)
+ *           sig:       the signature being verified (cannot be NULL)
+ *           siglen:    the length of the signature
+ *           pubkey:    the public key to verify with (cannot be NULL)
+ *           pubkeylen: the length of pubkey
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
+  const secp256k1_context_t* ctx,
+  const unsigned char *msg32,
+  const unsigned char *sig,
+  int siglen,
+  const unsigned char *pubkey,
+  int pubkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(5);
+
+/** A pointer to a function to deterministically generate a nonce.
+ * Returns: 1 if a nonce was successfully generated. 0 will cause signing to fail.
+ * In:      msg32:     the 32-byte message hash being verified (will not be NULL)
+ *          key32:     pointer to a 32-byte secret key (will not be NULL)
+ *          attempt:   how many iterations we have tried to find a nonce.
+ *                     This will almost always be 0, but different attempt values
+ *                     are required to result in a different nonce.
+ *          data:      Arbitrary data pointer that is passed through.
+ * Out:     nonce32:   pointer to a 32-byte array to be filled by the function.
+ * Except for test cases, this function should compute some cryptographic hash of
+ * the message, the key and the attempt.
+ */
+typedef int (*secp256k1_nonce_function_t)(
+  unsigned char *nonce32,
+  const unsigned char *msg32,
+  const unsigned char *key32,
+  unsigned int attempt,
+  const void *data
+);
+
+/** An implementation of RFC6979 (using HMAC-SHA256) as nonce generation function.
+ * If a data pointer is passed, it is assumed to be a pointer to 32 bytes of
+ * extra entropy.
+ */
+extern const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979;
+
+/** A default safe nonce generation function (currently equal to secp256k1_nonce_function_rfc6979). */
+extern const secp256k1_nonce_function_t secp256k1_nonce_function_default;
+
+
+/** Create an ECDSA signature.
+ *  Returns: 1: signature created
+ *           0: the nonce generation function failed, the private key was invalid, or there is not
+ *              enough space in the signature (as indicated by siglen).
+ *  In:      ctx:    pointer to a context object, initialized for signing (cannot be NULL)
+ *           msg32:  the 32-byte message hash being signed (cannot be NULL)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL)
+ *           noncefp:pointer to a nonce generation function. If NULL, secp256k1_nonce_function_default is used
+ *           ndata:  pointer to arbitrary data used by the nonce generation function (can be NULL)
+ *  Out:     sig:    pointer to an array where the signature will be placed (cannot be NULL)
+ *  In/Out:  siglen: pointer to an int with the length of sig, which will be updated
+ *                   to contain the actual signature length (<=72).
+ *
+ * The sig always has an s value in the lower half of the range (From 0x1
+ * to 0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0,
+ * inclusive), unlike many other implementations.
+ * With ECDSA a third-party can can forge a second distinct signature
+ * of the same message given a single initial signature without knowing
+ * the key by setting s to its additive inverse mod-order, 'flipping' the
+ * sign of the random point R which is not included in the signature.
+ * Since the forgery is of the same message this isn't universally
+ * problematic, but in systems where message malleability or uniqueness
+ * of signatures is important this can cause issues.  This forgery can be
+ * blocked by all verifiers forcing signers to use a canonical form. The
+ * lower-S form reduces the size of signatures slightly on average when
+ * variable length encodings (such as DER) are used and is cheap to
+ * verify, making it a good choice. Security of always using lower-S is
+ * assured because anyone can trivially modify a signature after the
+ * fact to enforce this property.  Adjusting it inside the signing
+ * function avoids the need to re-serialize or have curve specific
+ * constants outside of the library.  By always using a canonical form
+ * even in applications where it isn't needed it becomes possible to
+ * impose a requirement later if a need is discovered.
+ * No other forms of ECDSA malleability are known and none seem likely,
+ * but there is no formal proof that ECDSA, even with this additional
+ * restriction, is free of other malleability.  Commonly used serialization
+ * schemes will also accept various non-unique encodings, so care should
+ * be taken when this property is required for an application.
+ */
+int secp256k1_ecdsa_sign(
+  const secp256k1_context_t* ctx,
+  const unsigned char *msg32,
+  unsigned char *sig,
+  int *siglen,
+  const unsigned char *seckey,
+  secp256k1_nonce_function_t noncefp,
+  const void *ndata
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
+
+/** Create a compact ECDSA signature (64 byte + recovery id).
+ *  Returns: 1: signature created
+ *           0: the nonce generation function failed, or the secret key was invalid.
+ *  In:      ctx:    pointer to a context object, initialized for signing (cannot be NULL)
+ *           msg32:  the 32-byte message hash being signed (cannot be NULL)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL)
+ *           noncefp:pointer to a nonce generation function. If NULL, secp256k1_nonce_function_default is used
+ *           ndata:  pointer to arbitrary data used by the nonce generation function (can be NULL)
+ *  Out:     sig:    pointer to a 64-byte array where the signature will be placed (cannot be NULL)
+ *                   In case 0 is returned, the returned signature length will be zero.
+ *           recid:  pointer to an int, which will be updated to contain the recovery id (can be NULL)
+ */
+int secp256k1_ecdsa_sign_compact(
+  const secp256k1_context_t* ctx,
+  const unsigned char *msg32,
+  unsigned char *sig64,
+  const unsigned char *seckey,
+  secp256k1_nonce_function_t noncefp,
+  const void *ndata,
+  int *recid
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Recover an ECDSA public key from a compact signature.
+ *  Returns: 1: public key successfully recovered (which guarantees a correct signature).
+ *           0: otherwise.
+ *  In:      ctx:        pointer to a context object, initialized for verification (cannot be NULL)
+ *           msg32:      the 32-byte message hash assumed to be signed (cannot be NULL)
+ *           sig64:      signature as 64 byte array (cannot be NULL)
+ *           compressed: whether to recover a compressed or uncompressed pubkey
+ *           recid:      the recovery id (0-3, as returned by ecdsa_sign_compact)
+ *  Out:     pubkey:     pointer to a 33 or 65 byte array to put the pubkey (cannot be NULL)
+ *           pubkeylen:  pointer to an int that will contain the pubkey length (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact(
+  const secp256k1_context_t* ctx,
+  const unsigned char *msg32,
+  const unsigned char *sig64,
+  unsigned char *pubkey,
+  int *pubkeylen,
+  int compressed,
+  int recid
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
+
+/** Verify an ECDSA secret key.
+ *  Returns: 1: secret key is valid
+ *           0: secret key is invalid
+ *  In:      ctx: pointer to a context object (cannot be NULL)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_verify(
+  const secp256k1_context_t* ctx,
+  const unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Just validate a public key.
+ *  Returns: 1: public key is valid
+ *           0: public key is invalid
+ *  In:      ctx:       pointer to a context object (cannot be NULL)
+ *           pubkey:    pointer to a 33-byte or 65-byte public key (cannot be NULL).
+ *           pubkeylen: length of pubkey
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_verify(
+  const secp256k1_context_t* ctx,
+  const unsigned char *pubkey,
+  int pubkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Compute the public key for a secret key.
+ *  In:     ctx:        pointer to a context object, initialized for signing (cannot be NULL)
+ *          compressed: whether the computed public key should be compressed
+ *          seckey:     pointer to a 32-byte private key (cannot be NULL)
+ *  Out:    pubkey:     pointer to a 33-byte (if compressed) or 65-byte (if uncompressed)
+ *                      area to store the public key (cannot be NULL)
+ *          pubkeylen:  pointer to int that will be updated to contains the pubkey's
+ *                      length (cannot be NULL)
+ *  Returns: 1: secret was valid, public key stores
+ *           0: secret was invalid, try again
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_create(
+  const secp256k1_context_t* ctx,
+  unsigned char *pubkey,
+  int *pubkeylen,
+  const unsigned char *seckey,
+  int compressed
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Decompress a public key.
+ * In:     ctx:       pointer to a context object (cannot be NULL)
+ * In/Out: pubkey:    pointer to a 65-byte array to put the decompressed public key.
+ *                    It must contain a 33-byte or 65-byte public key already (cannot be NULL)
+ *         pubkeylen: pointer to the size of the public key pointed to by pubkey (cannot be NULL)
+ *                    It will be updated to reflect the new size.
+ * Returns: 0: pubkey was invalid
+ *          1: pubkey was valid, and was replaced with its decompressed version
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_decompress(
+  const secp256k1_context_t* ctx,
+  unsigned char *pubkey,
+  int *pubkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Export a private key in DER format.
+ * In: ctx: pointer to a context object, initialized for signing (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_export(
+  const secp256k1_context_t* ctx,
+  const unsigned char *seckey,
+  unsigned char *privkey,
+  int *privkeylen,
+  int compressed
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Import a private key in DER format. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_import(
+  const secp256k1_context_t* ctx,
+  unsigned char *seckey,
+  const unsigned char *privkey,
+  int privkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Tweak a private key by adding tweak to it. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_add(
+  const secp256k1_context_t* ctx,
+  unsigned char *seckey,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Tweak a public key by adding tweak times the generator to it.
+ * In: ctx: pointer to a context object, initialized for verification (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_add(
+  const secp256k1_context_t* ctx,
+  unsigned char *pubkey,
+  int pubkeylen,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
+
+/** Tweak a private key by multiplying it with tweak. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_mul(
+  const secp256k1_context_t* ctx,
+  unsigned char *seckey,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Tweak a public key by multiplying it with tweak.
+ * In: ctx: pointer to a context object, initialized for verification (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_mul(
+  const secp256k1_context_t* ctx,
+  unsigned char *pubkey,
+  int pubkeylen,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
+
+/** Updates the context randomization.
+ *  Returns: 1: randomization successfully updated
+ *           0: error
+ *  In:      ctx:       pointer to a context object (cannot be NULL)
+ *           seed32:    pointer to a 32-byte random seed (NULL resets to initial state)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_context_randomize(
+  secp256k1_context_t* ctx,
+  const unsigned char *seed32
+) SECP256K1_ARG_NONNULL(1);
+
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif
diff --git a/secp256k1/libsecp256k1-config.h b/secp256k1/libsecp256k1-config.h
new file mode 100644
index 000000000..d23fee837
--- /dev/null
+++ b/secp256k1/libsecp256k1-config.h
@@ -0,0 +1,134 @@
+/* src/libsecp256k1-config.h.  Generated from libsecp256k1-config.h.in by configure.  */
+/* src/libsecp256k1-config.h.in.  Generated from configure.ac by autoheader.  */
+
+#ifndef LIBSECP256K1_CONFIG_H
+
+#define LIBSECP256K1_CONFIG_H
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* Define this symbol if OpenSSL EC functions are available */
+/* #undef ENABLE_OPENSSL_TESTS */
+
+/* Define this symbol if __builtin_expect is available */
+#define HAVE_BUILTIN_EXPECT 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define this symbol if libcrypto is installed */
+/* #undef HAVE_LIBCRYPTO */
+
+/* Define this symbol if libgmp is installed */
+#define HAVE_LIBGMP 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if the system has the type `__int128'. */
+#define HAVE___INT128 1
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "libsecp256k1"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libsecp256k1"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libsecp256k1 0.1"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libsecp256k1"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.1"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define this symbol to enable x86_64 assembly optimizations */
+/* #undef USE_ASM_X86_64 */
+
+/* Define this symbol to use endomorphism optimization */
+/* #undef USE_ENDOMORPHISM */
+
+/* Define this symbol to use the FIELD_10X26 implementation */
+/* #undef USE_FIELD_10X26 */
+
+/* Define this symbol to use the FIELD_5X52 implementation */
+#define USE_FIELD_5X52 1
+
+/* Define this symbol to use the native field inverse implementation */
+/* #undef USE_FIELD_INV_BUILTIN */
+
+/* Define this symbol to use the num-based field inverse implementation */
+#define USE_FIELD_INV_NUM 1
+
+/* Define this symbol to use the gmp implementation for num */
+#define USE_NUM_GMP 1
+
+/* Define this symbol to use no num implementation */
+/* #undef USE_NUM_NONE */
+
+/* Define this symbol to use the 4x64 scalar implementation */
+#define USE_SCALAR_4X64 1
+
+/* Define this symbol to use the 8x32 scalar implementation */
+/* #undef USE_SCALAR_8X32 */
+
+/* Define this symbol to use the native scalar inverse implementation */
+/* #undef USE_SCALAR_INV_BUILTIN */
+
+/* Define this symbol to use the num-based scalar inverse implementation */
+#define USE_SCALAR_INV_NUM 1
+
+/* Version number of package */
+#define VERSION "0.1"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+#endif /*LIBSECP256K1_CONFIG_H*/
diff --git a/secp256k1/num.h b/secp256k1/num.h
index 3fdd8f39d..339b6bb6e 100644
--- a/secp256k1/num.h
+++ b/secp256k1/num.h
@@ -1,95 +1,68 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_NUM_
 #define _SECP256K1_NUM_
 
+#ifndef USE_NUM_NONE
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
 #if defined(USE_NUM_GMP)
 #include "num_gmp.h"
-#elif defined(USE_NUM_OPENSSL)
-#include "num_openssl.h"
-#elif defined(USE_NUM_BOOST)
-#include "num_boost.h"
 #else
 #error "Please select num implementation"
 #endif
 
-/** Initialize a number. */
-void static secp256k1_num_init(secp256k1_num_t *r);
-
-/** Free a number. */
-void static secp256k1_num_free(secp256k1_num_t *r);
-
 /** Copy a number. */
-void static secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a);
+static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a);
 
 /** Convert a number's absolute value to a binary big-endian string.
  *  There must be enough place. */
-void static secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a);
+static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a);
 
 /** Set a number to the value of a binary big-endian string. */
-void static secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen);
-
-/** Set a number equal to a (signed) integer. */
-void static secp256k1_num_set_int(secp256k1_num_t *r, int a);
+static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen);
 
 /** Compute a modular inverse. The input must be less than the modulus. */
-void static secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m);
-
-/** Multiply two numbers modulo another. */
-void static secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m);
+static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m);
 
 /** Compare the absolute value of two numbers. */
-int  static secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b);
+static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Test whether two number are equal (including sign). */
+static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b);
 
 /** Add two (signed) numbers. */
-void static secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
 
 /** Subtract two (signed) numbers. */
-void static secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
 
 /** Multiply two (signed) numbers. */
-void static secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
-
-/** Divide two (signed) numbers. */
-void static secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
 
 /** Replace a number by its remainder modulo m. M's sign is ignored. The result is a number between 0 and m-1,
     even if r was negative. */
-void static secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m);
+static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m);
 
-/** Calculate the number of bits in (the absolute value of) a number. */
-int  static secp256k1_num_bits(const secp256k1_num_t *a);
-
-/** Right-shift the passed number by bits bits, and return those bits. */
-int  static secp256k1_num_shift(secp256k1_num_t *r, int bits);
+/** Right-shift the passed number by bits bits. */
+static void secp256k1_num_shift(secp256k1_num_t *r, int bits);
 
 /** Check whether a number is zero. */
-int  static secp256k1_num_is_zero(const secp256k1_num_t *a);
-
-/** Check whether a number is odd. */
-int  static secp256k1_num_is_odd(const secp256k1_num_t *a);
+static int secp256k1_num_is_zero(const secp256k1_num_t *a);
 
 /** Check whether a number is strictly negative. */
-int  static secp256k1_num_is_neg(const secp256k1_num_t *a);
-
-/** Check whether a particular bit is set in a number. */
-int  static secp256k1_num_get_bit(const secp256k1_num_t *a, int pos);
-
-/** Increase a number by 1. */
-void static secp256k1_num_inc(secp256k1_num_t *r);
-
-/** Set a number equal to the value of a hex string (unsigned). */
-void static secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen);
-
-/** Convert (the absolute value of) a number to a hexadecimal string. */
-void static secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a);
-
-/** Split a number into a low and high part. */
-void static secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits);
+static int secp256k1_num_is_neg(const secp256k1_num_t *a);
 
 /** Change a number's sign. */
-void static secp256k1_num_negate(secp256k1_num_t *r);
+static void secp256k1_num_negate(secp256k1_num_t *r);
+
+#endif
 
 #endif
diff --git a/secp256k1/num_boost.h b/secp256k1/num_boost.h
deleted file mode 100644
index 2c9e49aee..000000000
--- a/secp256k1/num_boost.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_REPR_
-#define _SECP256K1_NUM_REPR_
-
-#include <boost/multiprecision/cpp_int.hpp>
-
-typedef boost::multiprecision::number<boost::multiprecision::cpp_int_backend<512, 512, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>> secp256k1_num_t ;
-
-#endif
diff --git a/secp256k1/num_gmp.h b/secp256k1/num_gmp.h
index 960df8605..baa1f2bf2 100644
--- a/secp256k1/num_gmp.h
+++ b/secp256k1/num_gmp.h
@@ -1,6 +1,8 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_NUM_REPR_
 #define _SECP256K1_NUM_REPR_
diff --git a/secp256k1/num_gmp_impl.h b/secp256k1/num_gmp_impl.h
new file mode 100644
index 000000000..dbbc458d5
--- /dev/null
+++ b/secp256k1/num_gmp_impl.h
@@ -0,0 +1,260 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_REPR_IMPL_H_
+#define _SECP256K1_NUM_REPR_IMPL_H_
+
+#include <string.h>
+#include <stdlib.h>
+#include <gmp.h>
+
+#include "util.h"
+#include "num.h"
+
+#ifdef VERIFY
+static void secp256k1_num_sanity(const secp256k1_num_t *a) {
+    VERIFY_CHECK(a->limbs == 1 || (a->limbs > 1 && a->data[a->limbs-1] != 0));
+}
+#else
+#define secp256k1_num_sanity(a) do { } while(0)
+#endif
+
+static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) {
+    *r = *a;
+}
+
+static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) {
+    unsigned char tmp[65];
+    int len = 0;
+    int shift = 0;
+    if (a->limbs>1 || a->data[0] != 0) {
+        len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs);
+    }
+    while (shift < len && tmp[shift] == 0) shift++;
+    VERIFY_CHECK(len-shift <= (int)rlen);
+    memset(r, 0, rlen - len + shift);
+    if (len > shift) {
+        memcpy(r + rlen - len + shift, tmp + shift, len - shift);
+    }
+    memset(tmp, 0, sizeof(tmp));
+}
+
+static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) {
+    int len;
+    VERIFY_CHECK(alen > 0);
+    VERIFY_CHECK(alen <= 64);
+    len = mpn_set_str(r->data, a, alen, 256);
+    if (len == 0) {
+        r->data[0] = 0;
+        len = 1;
+    }
+    VERIFY_CHECK(len <= NUM_LIMBS*2);
+    r->limbs = len;
+    r->neg = 0;
+    while (r->limbs > 1 && r->data[r->limbs-1]==0) {
+        r->limbs--;
+    }
+}
+
+static void secp256k1_num_add_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t c = mpn_add(r->data, a->data, a->limbs, b->data, b->limbs);
+    r->limbs = a->limbs;
+    if (c != 0) {
+        VERIFY_CHECK(r->limbs < 2*NUM_LIMBS);
+        r->data[r->limbs++] = c;
+    }
+}
+
+static void secp256k1_num_sub_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t c = mpn_sub(r->data, a->data, a->limbs, b->data, b->limbs);
+    VERIFY_CHECK(c == 0);
+    r->limbs = a->limbs;
+    while (r->limbs > 1 && r->data[r->limbs-1]==0) {
+        r->limbs--;
+    }
+}
+
+static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) {
+    secp256k1_num_sanity(r);
+    secp256k1_num_sanity(m);
+
+    if (r->limbs >= m->limbs) {
+        mp_limb_t t[2*NUM_LIMBS];
+        mpn_tdiv_qr(t, r->data, 0, r->data, r->limbs, m->data, m->limbs);
+        memset(t, 0, sizeof(t));
+        r->limbs = m->limbs;
+        while (r->limbs > 1 && r->data[r->limbs-1]==0) {
+            r->limbs--;
+        }
+    }
+
+    if (r->neg && (r->limbs > 1 || r->data[0] != 0)) {
+        secp256k1_num_sub_abs(r, m, r);
+        r->neg = 0;
+    }
+}
+
+static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) {
+    int i;
+    mp_limb_t g[NUM_LIMBS+1];
+    mp_limb_t u[NUM_LIMBS+1];
+    mp_limb_t v[NUM_LIMBS+1];
+    mp_size_t sn;
+    mp_size_t gn;
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(m);
+
+    /** mpn_gcdext computes: (G,S) = gcdext(U,V), where
+     *  * G = gcd(U,V)
+     *  * G = U*S + V*T
+     *  * U has equal or more limbs than V, and V has no padding
+     *  If we set U to be (a padded version of) a, and V = m:
+     *    G = a*S + m*T
+     *    G = a*S mod m
+     *  Assuming G=1:
+     *    S = 1/a mod m
+     */
+    VERIFY_CHECK(m->limbs <= NUM_LIMBS);
+    VERIFY_CHECK(m->data[m->limbs-1] != 0);
+    for (i = 0; i < m->limbs; i++) {
+        u[i] = (i < a->limbs) ? a->data[i] : 0;
+        v[i] = m->data[i];
+    }
+    sn = NUM_LIMBS+1;
+    gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
+    VERIFY_CHECK(gn == 1);
+    VERIFY_CHECK(g[0] == 1);
+    r->neg = a->neg ^ m->neg;
+    if (sn < 0) {
+        mpn_sub(r->data, m->data, m->limbs, r->data, -sn);
+        r->limbs = m->limbs;
+        while (r->limbs > 1 && r->data[r->limbs-1]==0) {
+            r->limbs--;
+        }
+    } else {
+        r->limbs = sn;
+    }
+    memset(g, 0, sizeof(g));
+    memset(u, 0, sizeof(u));
+    memset(v, 0, sizeof(v));
+}
+
+static int secp256k1_num_is_zero(const secp256k1_num_t *a) {
+    return (a->limbs == 1 && a->data[0] == 0);
+}
+
+static int secp256k1_num_is_neg(const secp256k1_num_t *a) {
+    return (a->limbs > 1 || a->data[0] != 0) && a->neg;
+}
+
+static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    if (a->limbs > b->limbs) {
+        return 1;
+    }
+    if (a->limbs < b->limbs) {
+        return -1;
+    }
+    return mpn_cmp(a->data, b->data, a->limbs);
+}
+
+static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    if (a->limbs > b->limbs) {
+        return 0;
+    }
+    if (a->limbs < b->limbs) {
+        return 0;
+    }
+    if ((a->neg && !secp256k1_num_is_zero(a)) != (b->neg && !secp256k1_num_is_zero(b))) {
+        return 0;
+    }
+    return mpn_cmp(a->data, b->data, a->limbs) == 0;
+}
+
+static void secp256k1_num_subadd(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, int bneg) {
+    if (!(b->neg ^ bneg ^ a->neg)) { /* a and b have the same sign */
+        r->neg = a->neg;
+        if (a->limbs >= b->limbs) {
+            secp256k1_num_add_abs(r, a, b);
+        } else {
+            secp256k1_num_add_abs(r, b, a);
+        }
+    } else {
+        if (secp256k1_num_cmp(a, b) > 0) {
+            r->neg = a->neg;
+            secp256k1_num_sub_abs(r, a, b);
+        } else {
+            r->neg = b->neg ^ bneg;
+            secp256k1_num_sub_abs(r, b, a);
+        }
+    }
+}
+
+static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    secp256k1_num_subadd(r, a, b, 0);
+}
+
+static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    secp256k1_num_subadd(r, a, b, 1);
+}
+
+static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t tmp[2*NUM_LIMBS+1];
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+
+    VERIFY_CHECK(a->limbs + b->limbs <= 2*NUM_LIMBS+1);
+    if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) {
+        r->limbs = 1;
+        r->neg = 0;
+        r->data[0] = 0;
+        return;
+    }
+    if (a->limbs >= b->limbs) {
+        mpn_mul(tmp, a->data, a->limbs, b->data, b->limbs);
+    } else {
+        mpn_mul(tmp, b->data, b->limbs, a->data, a->limbs);
+    }
+    r->limbs = a->limbs + b->limbs;
+    if (r->limbs > 1 && tmp[r->limbs - 1]==0) {
+        r->limbs--;
+    }
+    VERIFY_CHECK(r->limbs <= 2*NUM_LIMBS);
+    mpn_copyi(r->data, tmp, r->limbs);
+    r->neg = a->neg ^ b->neg;
+    memset(tmp, 0, sizeof(tmp));
+}
+
+static void secp256k1_num_shift(secp256k1_num_t *r, int bits) {
+    int i;
+    if (bits % GMP_NUMB_BITS) {
+        /* Shift within limbs. */
+        mpn_rshift(r->data, r->data, r->limbs, bits % GMP_NUMB_BITS);
+    }
+    if (bits >= GMP_NUMB_BITS) {
+        /* Shift full limbs. */
+        for (i = 0; i < r->limbs; i++) {
+            int index = i + (bits / GMP_NUMB_BITS);
+            if (index < r->limbs && index < 2*NUM_LIMBS) {
+                r->data[i] = r->data[index];
+            } else {
+                r->data[i] = 0;
+            }
+        }
+    }
+    while (r->limbs>1 && r->data[r->limbs-1]==0) {
+        r->limbs--;
+    }
+}
+
+static void secp256k1_num_negate(secp256k1_num_t *r) {
+    r->neg ^= 1;
+}
+
+#endif
diff --git a/secp256k1/num_impl.h b/secp256k1/num_impl.h
new file mode 100644
index 000000000..0b0e3a072
--- /dev/null
+++ b/secp256k1/num_impl.h
@@ -0,0 +1,24 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_IMPL_H_
+#define _SECP256K1_NUM_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "num.h"
+
+#if defined(USE_NUM_GMP)
+#include "num_gmp_impl.h"
+#elif defined(USE_NUM_NONE)
+/* Nothing. */
+#else
+#error "Please select num implementation"
+#endif
+
+#endif
diff --git a/secp256k1/num_openssl.h b/secp256k1/num_openssl.h
deleted file mode 100644
index 7d03757f6..000000000
--- a/secp256k1/num_openssl.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_NUM_REPR_
-#define _SECP256K1_NUM_REPR_
-
-#include <openssl/bn.h>
-
-typedef struct {
-    BIGNUM bn;
-} secp256k1_num_t;
-
-#endif
diff --git a/secp256k1/scalar.h b/secp256k1/scalar.h
new file mode 100644
index 000000000..f5d09f8d4
--- /dev/null
+++ b/secp256k1/scalar.h
@@ -0,0 +1,93 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_
+#define _SECP256K1_SCALAR_
+
+#include "num.h"
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_SCALAR_4X64)
+#include "scalar_4x64.h"
+#elif defined(USE_SCALAR_8X32)
+#include "scalar_8x32.h"
+#else
+#error "Please select scalar implementation"
+#endif
+
+/** Clear a scalar to prevent the leak of sensitive data. */
+static void secp256k1_scalar_clear(secp256k1_scalar_t *r);
+
+/** Access bits from a scalar. All requested bits must belong to the same 32-bit limb. */
+static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count);
+
+/** Access bits from a scalar. Not constant time. */
+static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count);
+
+/** Set a scalar from a big endian byte array. */
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *bin, int *overflow);
+
+/** Set a scalar to an unsigned integer. */
+static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v);
+
+/** Convert a scalar to a byte array. */
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a);
+
+/** Add two scalars together (modulo the group order). Returns whether it overflowed. */
+static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
+
+/** Add a power of two to a scalar. The result is not allowed to overflow. */
+static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
+
+/** Multiply two scalars (modulo the group order). */
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
+
+/** Compute the square of a scalar (modulo the group order). */
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Compute the inverse of a scalar (modulo the group order). */
+static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Compute the inverse of a scalar (modulo the group order), without constant-time guarantee. */
+static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Compute the complement of a scalar (modulo the group order). */
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Check whether a scalar equals zero. */
+static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a);
+
+/** Check whether a scalar equals one. */
+static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a);
+
+/** Check whether a scalar is higher than the group order divided by 2. */
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a);
+
+#ifndef USE_NUM_NONE
+/** Convert a scalar to a number. */
+static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a);
+
+/** Get the order of the group as a number. */
+static void secp256k1_scalar_order_get_num(secp256k1_num_t *r);
+#endif
+
+/** Compare two scalars. */
+static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
+
+#ifdef USE_ENDOMORPHISM
+/** Find r1 and r2 such that r1+r2*2^128 = a. */
+static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
+/** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
+static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
+#endif
+
+/** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
+static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift);
+
+#endif
diff --git a/secp256k1/scalar_4x64.h b/secp256k1/scalar_4x64.h
new file mode 100644
index 000000000..82899aa7b
--- /dev/null
+++ b/secp256k1/scalar_4x64.h
@@ -0,0 +1,19 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_
+#define _SECP256K1_SCALAR_REPR_
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint64_t d[4];
+} secp256k1_scalar_t;
+
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{((uint64_t)(d1)) << 32 | (d0), ((uint64_t)(d3)) << 32 | (d2), ((uint64_t)(d5)) << 32 | (d4), ((uint64_t)(d7)) << 32 | (d6)}}
+
+#endif
diff --git a/secp256k1/scalar_4x64_impl.h b/secp256k1/scalar_4x64_impl.h
new file mode 100644
index 000000000..ff365292f
--- /dev/null
+++ b/secp256k1/scalar_4x64_impl.h
@@ -0,0 +1,920 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
+#define _SECP256K1_SCALAR_REPR_IMPL_H_
+
+/* Limbs of the secp256k1 order. */
+#define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
+#define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
+#define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
+#define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+
+/* Limbs of 2^256 minus the secp256k1 order. */
+#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
+#define SECP256K1_N_C_1 (~SECP256K1_N_1)
+#define SECP256K1_N_C_2 (1)
+
+/* Limbs of half the secp256k1 order. */
+#define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
+#define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
+#define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
+
+SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) {
+    r->d[0] = 0;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v) {
+    r->d[0] = v;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+}
+
+SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
+    VERIFY_CHECK((offset + count - 1) >> 6 == offset >> 6);
+    return (a->d[offset >> 6] >> (offset & 0x3F)) & ((((uint64_t)1) << count) - 1);
+}
+
+SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
+    VERIFY_CHECK(count < 32);
+    VERIFY_CHECK(offset + count <= 256);
+    if ((offset + count - 1) >> 6 == offset >> 6) {
+        return secp256k1_scalar_get_bits(a, offset, count);
+    } else {
+        VERIFY_CHECK((offset >> 6) + 1 < 4);
+        return ((a->d[offset >> 6] >> (offset & 0x3F)) | (a->d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & ((((uint64_t)1) << count) - 1);
+    }
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */
+    no |= (a->d[2] < SECP256K1_N_2);
+    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_1);
+    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
+    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
+    return yes;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsigned int overflow) {
+    uint128_t t;
+    VERIFY_CHECK(overflow <= 1);
+    t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2;
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint64_t)r->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
+    return overflow;
+}
+
+static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    int overflow;
+    uint128_t t = (uint128_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    overflow = t + secp256k1_scalar_check_overflow(r);
+    VERIFY_CHECK(overflow == 0 || overflow == 1);
+    secp256k1_scalar_reduce(r, overflow);
+    return overflow;
+}
+
+static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+    uint128_t t;
+    VERIFY_CHECK(bit < 256);
+    t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
+#ifdef VERIFY
+    VERIFY_CHECK((t >> 64) == 0);
+    VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    int over;
+    r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
+    r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
+    r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
+    r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
+    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    if (overflow) {
+        *overflow = over;
+    }
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
+    bin[0] = a->d[3] >> 56; bin[1] = a->d[3] >> 48; bin[2] = a->d[3] >> 40; bin[3] = a->d[3] >> 32; bin[4] = a->d[3] >> 24; bin[5] = a->d[3] >> 16; bin[6] = a->d[3] >> 8; bin[7] = a->d[3];
+    bin[8] = a->d[2] >> 56; bin[9] = a->d[2] >> 48; bin[10] = a->d[2] >> 40; bin[11] = a->d[2] >> 32; bin[12] = a->d[2] >> 24; bin[13] = a->d[2] >> 16; bin[14] = a->d[2] >> 8; bin[15] = a->d[2];
+    bin[16] = a->d[1] >> 56; bin[17] = a->d[1] >> 48; bin[18] = a->d[1] >> 40; bin[19] = a->d[1] >> 32; bin[20] = a->d[1] >> 24; bin[21] = a->d[1] >> 16; bin[22] = a->d[1] >> 8; bin[23] = a->d[1];
+    bin[24] = a->d[0] >> 56; bin[25] = a->d[0] >> 48; bin[26] = a->d[0] >> 40; bin[27] = a->d[0] >> 32; bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
+    return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
+    uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1;
+    r->d[0] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[1]) + SECP256K1_N_1;
+    r->d[1] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[2]) + SECP256K1_N_2;
+    r->d[2] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[3]) + SECP256K1_N_3;
+    r->d[3] = t & nonzero;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
+    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[3] < SECP256K1_N_H_3);
+    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */
+    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
+    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
+    return yes;
+}
+
+/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
+
+/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd(a,b) { \
+    uint64_t tl, th; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c1 += th;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < th) ? 1 : 0;  /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+}
+
+/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
+#define muladd_fast(a,b) { \
+    uint64_t tl, th; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK(c1 >= th); \
+}
+
+/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd2(a,b) { \
+    uint64_t tl, th, th2, tl2; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;               /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    th2 = th + th;                  /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
+    c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
+    tl2 = tl + tl;                  /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
+    th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c0 += tl2;                      /* overflow is handled on the next line */ \
+    th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
+    c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
+    c1 += th2;                      /* overflow is handled on the next line */ \
+    c2 += (c1 < th2) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
+}
+
+/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define sumadd(a) { \
+    unsigned int over; \
+    c0 += (a);                  /* overflow is handled on the next line */ \
+    over = (c0 < (a)) ? 1 : 0; \
+    c1 += over;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
+}
+
+/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
+#define sumadd_fast(a) { \
+    c0 += (a);                 /* overflow is handled on the next line */ \
+    c1 += (c0 < (a)) ? 1 : 0;  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. */
+#define extract(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = c2; \
+    c2 = 0; \
+}
+
+/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. c2 is required to be zero. */
+#define extract_fast(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = 0; \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l) {
+#ifdef USE_ASM_X86_64
+    /* Reduce 512 bits into 385. */
+    uint64_t m0, m1, m2, m3, m4, m5, m6;
+    uint64_t p0, p1, p2, p3, p4;
+    uint64_t c;
+
+    __asm__ __volatile__(
+    /* Preload. */
+    "movq 32(%%rsi), %%r11\n"
+    "movq 40(%%rsi), %%r12\n"
+    "movq 48(%%rsi), %%r13\n"
+    "movq 56(%%rsi), %%r14\n"
+    /* Initialize r8,r9,r10 */
+    "movq 0(%%rsi), %%r8\n"
+    "movq $0, %%r9\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9) += n0 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* extract m0 */
+    "movq %%r8, %q0\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10) += l1 */
+    "addq 8(%%rsi), %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r9,r10,r8) += n1 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += n0 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m1 */
+    "movq %%r9, %q1\n"
+    "movq $0, %%r9\n"
+    /* (r10,r8,r9) += l2 */
+    "addq 16(%%rsi), %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n2 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n1 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n0 */
+    "addq %%r11, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract m2 */
+    "movq %%r10, %q2\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9,r10) += l3 */
+    "addq 24(%%rsi), %%r8\n"
+    "adcq $0, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n3 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n2 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n1 */
+    "addq %%r12, %%r8\n"
+    "adcq $0, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* extract m3 */
+    "movq %%r8, %q3\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10,r8) += n3 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += n2 */
+    "addq %%r13, %%r9\n"
+    "adcq $0, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m4 */
+    "movq %%r9, %q4\n"
+    /* (r10,r8) += n3 */
+    "addq %%r14, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m5 */
+    "movq %%r10, %q5\n"
+    /* extract m6 */
+    "movq %%r8, %q6\n"
+    : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
+    : "S"(l), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
+
+    /* Reduce 385 bits into 258. */
+    __asm__ __volatile__(
+    /* Preload */
+    "movq %q9, %%r11\n"
+    "movq %q10, %%r12\n"
+    "movq %q11, %%r13\n"
+    /* Initialize (r8,r9,r10) */
+    "movq %q5, %%r8\n"
+    "movq $0, %%r9\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9) += m4 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* extract p0 */
+    "movq %%r8, %q0\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10) += m1 */
+    "addq %q6, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r9,r10,r8) += m5 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += m4 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract p1 */
+    "movq %%r9, %q1\n"
+    "movq $0, %%r9\n"
+    /* (r10,r8,r9) += m2 */
+    "addq %q7, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m6 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m5 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m4 */
+    "addq %%r11, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract p2 */
+    "movq %%r10, %q2\n"
+    /* (r8,r9) += m3 */
+    "addq %q8, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r8,r9) += m6 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* (r8,r9) += m5 */
+    "addq %%r12, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract p3 */
+    "movq %%r8, %q3\n"
+    /* (r9) += m6 */
+    "addq %%r13, %%r9\n"
+    /* extract p4 */
+    "movq %%r9, %q4\n"
+    : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
+    : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
+
+    /* Reduce 258 bits into 256. */
+    __asm__ __volatile__(
+    /* Preload */
+    "movq %q5, %%r10\n"
+    /* (rax,rdx) = p4 * c0 */
+    "movq %7, %%rax\n"
+    "mulq %%r10\n"
+    /* (rax,rdx) += p0 */
+    "addq %q1, %%rax\n"
+    "adcq $0, %%rdx\n"
+    /* extract r0 */
+    "movq %%rax, 0(%q6)\n"
+    /* Move to (r8,r9) */
+    "movq %%rdx, %%r8\n"
+    "movq $0, %%r9\n"
+    /* (r8,r9) += p1 */
+    "addq %q2, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r8,r9) += p4 * c1 */
+    "movq %8, %%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* Extract r1 */
+    "movq %%r8, 8(%q6)\n"
+    "movq $0, %%r8\n"
+    /* (r9,r8) += p4 */
+    "addq %%r10, %%r9\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r8) += p2 */
+    "addq %q3, %%r9\n"
+    "adcq $0, %%r8\n"
+    /* Extract r2 */
+    "movq %%r9, 16(%q6)\n"
+    "movq $0, %%r9\n"
+    /* (r8,r9) += p3 */
+    "addq %q4, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract r3 */
+    "movq %%r8, 24(%q6)\n"
+    /* Extract c */
+    "movq %%r9, %q0\n"
+    : "=g"(c)
+    : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
+#else
+    uint128_t c;
+    uint64_t c0, c1, c2;
+    uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
+    uint64_t m0, m1, m2, m3, m4, m5;
+    uint32_t m6;
+    uint64_t p0, p1, p2, p3;
+    uint32_t p4;
+
+    /* Reduce 512 bits into 385. */
+    /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
+    c0 = l[0]; c1 = 0; c2 = 0;
+    muladd_fast(n0, SECP256K1_N_C_0);
+    extract_fast(m0);
+    sumadd_fast(l[1]);
+    muladd(n1, SECP256K1_N_C_0);
+    muladd(n0, SECP256K1_N_C_1);
+    extract(m1);
+    sumadd(l[2]);
+    muladd(n2, SECP256K1_N_C_0);
+    muladd(n1, SECP256K1_N_C_1);
+    sumadd(n0);
+    extract(m2);
+    sumadd(l[3]);
+    muladd(n3, SECP256K1_N_C_0);
+    muladd(n2, SECP256K1_N_C_1);
+    sumadd(n1);
+    extract(m3);
+    muladd(n3, SECP256K1_N_C_1);
+    sumadd(n2);
+    extract(m4);
+    sumadd_fast(n3);
+    extract_fast(m5);
+    VERIFY_CHECK(c0 <= 1);
+    m6 = c0;
+
+    /* Reduce 385 bits into 258. */
+    /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
+    c0 = m0; c1 = 0; c2 = 0;
+    muladd_fast(m4, SECP256K1_N_C_0);
+    extract_fast(p0);
+    sumadd_fast(m1);
+    muladd(m5, SECP256K1_N_C_0);
+    muladd(m4, SECP256K1_N_C_1);
+    extract(p1);
+    sumadd(m2);
+    muladd(m6, SECP256K1_N_C_0);
+    muladd(m5, SECP256K1_N_C_1);
+    sumadd(m4);
+    extract(p2);
+    sumadd_fast(m3);
+    muladd_fast(m6, SECP256K1_N_C_1);
+    sumadd_fast(m5);
+    extract_fast(p3);
+    p4 = c0 + m6;
+    VERIFY_CHECK(p4 <= 2);
+
+    /* Reduce 258 bits into 256. */
+    /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
+    c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
+    r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
+    r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p2 + (uint128_t)p4;
+    r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p3;
+    r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+#endif
+
+    /* Final reduction of r. */
+    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+#ifdef USE_ASM_X86_64
+    const uint64_t *pb = b->d;
+    __asm__ __volatile__(
+    /* Preload */
+    "movq 0(%%rdi), %%r15\n"
+    "movq 8(%%rdi), %%rbx\n"
+    "movq 16(%%rdi), %%rcx\n"
+    "movq 0(%%rdx), %%r11\n"
+    "movq 8(%%rdx), %%r12\n"
+    "movq 16(%%rdx), %%r13\n"
+    "movq 24(%%rdx), %%r14\n"
+    /* (rax,rdx) = a0 * b0 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r11\n"
+    /* Extract l0 */
+    "movq %%rax, 0(%%rsi)\n"
+    /* (r8,r9,r10) = (rdx) */
+    "movq %%rdx, %%r8\n"
+    "xorq %%r9, %%r9\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += a0 * b1 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a1 * b0 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l1 */
+    "movq %%r8, 8(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += a0 * b2 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a1 * b1 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a2 * b0 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l2 */
+    "movq %%r9, 16(%%rsi)\n"
+    "xorq %%r9, %%r9\n"
+    /* (r10,r8,r9) += a0 * b3 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Preload a3 */
+    "movq 24(%%rdi), %%r15\n"
+    /* (r10,r8,r9) += a1 * b2 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += a2 * b1 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += a3 * b0 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract l3 */
+    "movq %%r10, 24(%%rsi)\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += a1 * b3 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a2 * b2 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a3 * b1 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l4 */
+    "movq %%r8, 32(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += a2 * b3 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a3 * b2 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l5 */
+    "movq %%r9, 40(%%rsi)\n"
+    /* (r10,r8) += a3 * b3 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    /* Extract l6 */
+    "movq %%r10, 48(%%rsi)\n"
+    /* Extract l7 */
+    "movq %%r8, 56(%%rsi)\n"
+    : "+d"(pb)
+    : "S"(l), "D"(a->d)
+    : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
+#else
+    /* 160 bit accumulator. */
+    uint64_t c0 = 0, c1 = 0;
+    uint32_t c2 = 0;
+
+    /* l[0..7] = a[0..3] * b[0..3]. */
+    muladd_fast(a->d[0], b->d[0]);
+    extract_fast(l[0]);
+    muladd(a->d[0], b->d[1]);
+    muladd(a->d[1], b->d[0]);
+    extract(l[1]);
+    muladd(a->d[0], b->d[2]);
+    muladd(a->d[1], b->d[1]);
+    muladd(a->d[2], b->d[0]);
+    extract(l[2]);
+    muladd(a->d[0], b->d[3]);
+    muladd(a->d[1], b->d[2]);
+    muladd(a->d[2], b->d[1]);
+    muladd(a->d[3], b->d[0]);
+    extract(l[3]);
+    muladd(a->d[1], b->d[3]);
+    muladd(a->d[2], b->d[2]);
+    muladd(a->d[3], b->d[1]);
+    extract(l[4]);
+    muladd(a->d[2], b->d[3]);
+    muladd(a->d[3], b->d[2]);
+    extract(l[5]);
+    muladd_fast(a->d[3], b->d[3]);
+    extract_fast(l[6]);
+    VERIFY_CHECK(c1 <= 0);
+    l[7] = c0;
+#endif
+}
+
+static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar_t *a) {
+#ifdef USE_ASM_X86_64
+    __asm__ __volatile__(
+    /* Preload */
+    "movq 0(%%rdi), %%r11\n"
+    "movq 8(%%rdi), %%r12\n"
+    "movq 16(%%rdi), %%r13\n"
+    "movq 24(%%rdi), %%r14\n"
+    /* (rax,rdx) = a0 * a0 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r11\n"
+    /* Extract l0 */
+    "movq %%rax, 0(%%rsi)\n"
+    /* (r8,r9,r10) = (rdx,0) */
+    "movq %%rdx, %%r8\n"
+    "xorq %%r9, %%r9\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += 2 * a0 * a1 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l1 */
+    "movq %%r8, 8(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += 2 * a0 * a2 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a1 * a1 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l2 */
+    "movq %%r9, 16(%%rsi)\n"
+    "xorq %%r9, %%r9\n"
+    /* (r10,r8,r9) += 2 * a0 * a3 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += 2 * a1 * a2 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract l3 */
+    "movq %%r10, 24(%%rsi)\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += 2 * a1 * a3 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a2 * a2 */
+    "movq %%r13, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l4 */
+    "movq %%r8, 32(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += 2 * a2 * a3 */
+    "movq %%r13, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l5 */
+    "movq %%r9, 40(%%rsi)\n"
+    /* (r10,r8) += a3 * a3 */
+    "movq %%r14, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    /* Extract l6 */
+    "movq %%r10, 48(%%rsi)\n"
+    /* Extract l7 */
+    "movq %%r8, 56(%%rsi)\n"
+    :
+    : "S"(l), "D"(a->d)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory");
+#else
+    /* 160 bit accumulator. */
+    uint64_t c0 = 0, c1 = 0;
+    uint32_t c2 = 0;
+
+    /* l[0..7] = a[0..3] * b[0..3]. */
+    muladd_fast(a->d[0], a->d[0]);
+    extract_fast(l[0]);
+    muladd2(a->d[0], a->d[1]);
+    extract(l[1]);
+    muladd2(a->d[0], a->d[2]);
+    muladd(a->d[1], a->d[1]);
+    extract(l[2]);
+    muladd2(a->d[0], a->d[3]);
+    muladd2(a->d[1], a->d[2]);
+    extract(l[3]);
+    muladd2(a->d[1], a->d[3]);
+    muladd(a->d[2], a->d[2]);
+    extract(l[4]);
+    muladd2(a->d[2], a->d[3]);
+    extract(l[5]);
+    muladd_fast(a->d[3], a->d[3]);
+    extract_fast(l[6]);
+    VERIFY_CHECK(c1 == 0);
+    l[7] = c0;
+#endif
+}
+
+#undef sumadd
+#undef sumadd_fast
+#undef muladd
+#undef muladd_fast
+#undef muladd2
+#undef extract
+#undef extract_fast
+
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    uint64_t l[8];
+    secp256k1_scalar_mul_512(l, a, b);
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint64_t l[8];
+    secp256k1_scalar_sqr_512(l, a);
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+    r1->d[0] = a->d[0];
+    r1->d[1] = a->d[1];
+    r1->d[2] = 0;
+    r1->d[3] = 0;
+    r2->d[0] = a->d[2];
+    r2->d[1] = a->d[3];
+    r2->d[2] = 0;
+    r2->d[3] = 0;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
+    uint64_t l[8];
+    unsigned int shiftlimbs;
+    unsigned int shiftlow;
+    unsigned int shifthigh;
+    VERIFY_CHECK(shift >= 256);
+    secp256k1_scalar_mul_512(l, a, b);
+    shiftlimbs = shift >> 6;
+    shiftlow = shift & 0x3F;
+    shifthigh = 64 - shiftlow;
+    r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
+    if ((l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1) {
+        secp256k1_scalar_add_bit(r, 0);
+    }
+}
+
+#endif
diff --git a/secp256k1/scalar_8x32.h b/secp256k1/scalar_8x32.h
new file mode 100644
index 000000000..f17017e24
--- /dev/null
+++ b/secp256k1/scalar_8x32.h
@@ -0,0 +1,19 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_
+#define _SECP256K1_SCALAR_REPR_
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint32_t d[8];
+} secp256k1_scalar_t;
+
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}}
+
+#endif
diff --git a/secp256k1/scalar_8x32_impl.h b/secp256k1/scalar_8x32_impl.h
new file mode 100644
index 000000000..22b31d411
--- /dev/null
+++ b/secp256k1/scalar_8x32_impl.h
@@ -0,0 +1,681 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
+#define _SECP256K1_SCALAR_REPR_IMPL_H_
+
+/* Limbs of the secp256k1 order. */
+#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
+#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
+#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
+#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
+#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
+#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
+
+/* Limbs of 2^256 minus the secp256k1 order. */
+#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
+#define SECP256K1_N_C_1 (~SECP256K1_N_1)
+#define SECP256K1_N_C_2 (~SECP256K1_N_2)
+#define SECP256K1_N_C_3 (~SECP256K1_N_3)
+#define SECP256K1_N_C_4 (1)
+
+/* Limbs of half the secp256k1 order. */
+#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
+#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
+#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
+#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
+#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
+
+SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) {
+    r->d[0] = 0;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+    r->d[4] = 0;
+    r->d[5] = 0;
+    r->d[6] = 0;
+    r->d[7] = 0;
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v) {
+    r->d[0] = v;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+    r->d[4] = 0;
+    r->d[5] = 0;
+    r->d[6] = 0;
+    r->d[7] = 0;
+}
+
+SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
+    VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5);
+    return (a->d[offset >> 5] >> (offset & 0x1F)) & ((1 << count) - 1);
+}
+
+SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
+    VERIFY_CHECK(count < 32);
+    VERIFY_CHECK(offset + count <= 256);
+    if ((offset + count - 1) >> 5 == offset >> 5) {
+        return secp256k1_scalar_get_bits(a, offset, count);
+    } else {
+        VERIFY_CHECK((offset >> 5) + 1 < 8);
+        return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & ((((uint32_t)1) << count) - 1);
+    }
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */
+    no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_4);
+    yes |= (a->d[4] > SECP256K1_N_4) & ~no;
+    no |= (a->d[3] < SECP256K1_N_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
+    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
+    return yes;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
+    uint64_t t;
+    VERIFY_CHECK(overflow <= 1);
+    t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
+    r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
+    r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
+    r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
+    r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[5];
+    r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[6];
+    r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[7];
+    r->d[7] = t & 0xFFFFFFFFUL;
+    return overflow;
+}
+
+static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    int overflow;
+    uint64_t t = (uint64_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[4] + b->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[5] + b->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[6] + b->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[7] + b->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    overflow = t + secp256k1_scalar_check_overflow(r);
+    VERIFY_CHECK(overflow == 0 || overflow == 1);
+    secp256k1_scalar_reduce(r, overflow);
+    return overflow;
+}
+
+static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+    uint64_t t;
+    VERIFY_CHECK(bit < 256);
+    t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F));
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F));
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F));
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F));
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F));
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F));
+    r->d[7] = t & 0xFFFFFFFFULL;
+#ifdef VERIFY
+    VERIFY_CHECK((t >> 32) == 0);
+    VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    int over;
+    r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
+    r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
+    r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
+    r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24;
+    r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24;
+    r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
+    r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
+    r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
+    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    if (overflow) {
+        *overflow = over;
+    }
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
+    bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7];
+    bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6];
+    bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5];
+    bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4];
+    bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3];
+    bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2];
+    bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1];
+    bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
+    return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
+    uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
+    r->d[7] = t & nonzero;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
+    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[7] < SECP256K1_N_H_7);
+    yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
+    no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */
+    no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
+    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
+    return yes;
+}
+
+/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
+
+/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < th) ? 1 : 0;  /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+}
+
+/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
+#define muladd_fast(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK(c1 >= th); \
+}
+
+/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd2(a,b) { \
+    uint32_t tl, th, th2, tl2; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;               /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    th2 = th + th;                  /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
+    c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
+    tl2 = tl + tl;                  /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
+    th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFF */ \
+    c0 += tl2;                      /* overflow is handled on the next line */ \
+    th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
+    c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
+    c1 += th2;                      /* overflow is handled on the next line */ \
+    c2 += (c1 < th2) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
+}
+
+/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define sumadd(a) { \
+    unsigned int over; \
+    c0 += (a);                  /* overflow is handled on the next line */ \
+    over = (c0 < (a)) ? 1 : 0; \
+    c1 += over;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
+}
+
+/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
+#define sumadd_fast(a) { \
+    c0 += (a);                 /* overflow is handled on the next line */ \
+    c1 += (c0 < (a)) ? 1 : 0;  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
+#define extract(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = c2; \
+    c2 = 0; \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
+#define extract_fast(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = 0; \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
+    uint64_t c;
+    uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
+    uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12;
+    uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8;
+
+    /* 96 bit accumulator. */
+    uint32_t c0, c1, c2;
+
+    /* Reduce 512 bits into 385. */
+    /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
+    c0 = l[0]; c1 = 0; c2 = 0;
+    muladd_fast(n0, SECP256K1_N_C_0);
+    extract_fast(m0);
+    sumadd_fast(l[1]);
+    muladd(n1, SECP256K1_N_C_0);
+    muladd(n0, SECP256K1_N_C_1);
+    extract(m1);
+    sumadd(l[2]);
+    muladd(n2, SECP256K1_N_C_0);
+    muladd(n1, SECP256K1_N_C_1);
+    muladd(n0, SECP256K1_N_C_2);
+    extract(m2);
+    sumadd(l[3]);
+    muladd(n3, SECP256K1_N_C_0);
+    muladd(n2, SECP256K1_N_C_1);
+    muladd(n1, SECP256K1_N_C_2);
+    muladd(n0, SECP256K1_N_C_3);
+    extract(m3);
+    sumadd(l[4]);
+    muladd(n4, SECP256K1_N_C_0);
+    muladd(n3, SECP256K1_N_C_1);
+    muladd(n2, SECP256K1_N_C_2);
+    muladd(n1, SECP256K1_N_C_3);
+    sumadd(n0);
+    extract(m4);
+    sumadd(l[5]);
+    muladd(n5, SECP256K1_N_C_0);
+    muladd(n4, SECP256K1_N_C_1);
+    muladd(n3, SECP256K1_N_C_2);
+    muladd(n2, SECP256K1_N_C_3);
+    sumadd(n1);
+    extract(m5);
+    sumadd(l[6]);
+    muladd(n6, SECP256K1_N_C_0);
+    muladd(n5, SECP256K1_N_C_1);
+    muladd(n4, SECP256K1_N_C_2);
+    muladd(n3, SECP256K1_N_C_3);
+    sumadd(n2);
+    extract(m6);
+    sumadd(l[7]);
+    muladd(n7, SECP256K1_N_C_0);
+    muladd(n6, SECP256K1_N_C_1);
+    muladd(n5, SECP256K1_N_C_2);
+    muladd(n4, SECP256K1_N_C_3);
+    sumadd(n3);
+    extract(m7);
+    muladd(n7, SECP256K1_N_C_1);
+    muladd(n6, SECP256K1_N_C_2);
+    muladd(n5, SECP256K1_N_C_3);
+    sumadd(n4);
+    extract(m8);
+    muladd(n7, SECP256K1_N_C_2);
+    muladd(n6, SECP256K1_N_C_3);
+    sumadd(n5);
+    extract(m9);
+    muladd(n7, SECP256K1_N_C_3);
+    sumadd(n6);
+    extract(m10);
+    sumadd_fast(n7);
+    extract_fast(m11);
+    VERIFY_CHECK(c0 <= 1);
+    m12 = c0;
+
+    /* Reduce 385 bits into 258. */
+    /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
+    c0 = m0; c1 = 0; c2 = 0;
+    muladd_fast(m8, SECP256K1_N_C_0);
+    extract_fast(p0);
+    sumadd_fast(m1);
+    muladd(m9, SECP256K1_N_C_0);
+    muladd(m8, SECP256K1_N_C_1);
+    extract(p1);
+    sumadd(m2);
+    muladd(m10, SECP256K1_N_C_0);
+    muladd(m9, SECP256K1_N_C_1);
+    muladd(m8, SECP256K1_N_C_2);
+    extract(p2);
+    sumadd(m3);
+    muladd(m11, SECP256K1_N_C_0);
+    muladd(m10, SECP256K1_N_C_1);
+    muladd(m9, SECP256K1_N_C_2);
+    muladd(m8, SECP256K1_N_C_3);
+    extract(p3);
+    sumadd(m4);
+    muladd(m12, SECP256K1_N_C_0);
+    muladd(m11, SECP256K1_N_C_1);
+    muladd(m10, SECP256K1_N_C_2);
+    muladd(m9, SECP256K1_N_C_3);
+    sumadd(m8);
+    extract(p4);
+    sumadd(m5);
+    muladd(m12, SECP256K1_N_C_1);
+    muladd(m11, SECP256K1_N_C_2);
+    muladd(m10, SECP256K1_N_C_3);
+    sumadd(m9);
+    extract(p5);
+    sumadd(m6);
+    muladd(m12, SECP256K1_N_C_2);
+    muladd(m11, SECP256K1_N_C_3);
+    sumadd(m10);
+    extract(p6);
+    sumadd_fast(m7);
+    muladd_fast(m12, SECP256K1_N_C_3);
+    sumadd_fast(m11);
+    extract_fast(p7);
+    p8 = c0 + m12;
+    VERIFY_CHECK(p8 <= 2);
+
+    /* Reduce 258 bits into 256. */
+    /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
+    c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
+    r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
+    r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
+    r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
+    r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p4 + (uint64_t)p8;
+    r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p5;
+    r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p6;
+    r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p7;
+    r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
+
+    /* Final reduction of r. */
+    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    /* 96 bit accumulator. */
+    uint32_t c0 = 0, c1 = 0, c2 = 0;
+
+    /* l[0..15] = a[0..7] * b[0..7]. */
+    muladd_fast(a->d[0], b->d[0]);
+    extract_fast(l[0]);
+    muladd(a->d[0], b->d[1]);
+    muladd(a->d[1], b->d[0]);
+    extract(l[1]);
+    muladd(a->d[0], b->d[2]);
+    muladd(a->d[1], b->d[1]);
+    muladd(a->d[2], b->d[0]);
+    extract(l[2]);
+    muladd(a->d[0], b->d[3]);
+    muladd(a->d[1], b->d[2]);
+    muladd(a->d[2], b->d[1]);
+    muladd(a->d[3], b->d[0]);
+    extract(l[3]);
+    muladd(a->d[0], b->d[4]);
+    muladd(a->d[1], b->d[3]);
+    muladd(a->d[2], b->d[2]);
+    muladd(a->d[3], b->d[1]);
+    muladd(a->d[4], b->d[0]);
+    extract(l[4]);
+    muladd(a->d[0], b->d[5]);
+    muladd(a->d[1], b->d[4]);
+    muladd(a->d[2], b->d[3]);
+    muladd(a->d[3], b->d[2]);
+    muladd(a->d[4], b->d[1]);
+    muladd(a->d[5], b->d[0]);
+    extract(l[5]);
+    muladd(a->d[0], b->d[6]);
+    muladd(a->d[1], b->d[5]);
+    muladd(a->d[2], b->d[4]);
+    muladd(a->d[3], b->d[3]);
+    muladd(a->d[4], b->d[2]);
+    muladd(a->d[5], b->d[1]);
+    muladd(a->d[6], b->d[0]);
+    extract(l[6]);
+    muladd(a->d[0], b->d[7]);
+    muladd(a->d[1], b->d[6]);
+    muladd(a->d[2], b->d[5]);
+    muladd(a->d[3], b->d[4]);
+    muladd(a->d[4], b->d[3]);
+    muladd(a->d[5], b->d[2]);
+    muladd(a->d[6], b->d[1]);
+    muladd(a->d[7], b->d[0]);
+    extract(l[7]);
+    muladd(a->d[1], b->d[7]);
+    muladd(a->d[2], b->d[6]);
+    muladd(a->d[3], b->d[5]);
+    muladd(a->d[4], b->d[4]);
+    muladd(a->d[5], b->d[3]);
+    muladd(a->d[6], b->d[2]);
+    muladd(a->d[7], b->d[1]);
+    extract(l[8]);
+    muladd(a->d[2], b->d[7]);
+    muladd(a->d[3], b->d[6]);
+    muladd(a->d[4], b->d[5]);
+    muladd(a->d[5], b->d[4]);
+    muladd(a->d[6], b->d[3]);
+    muladd(a->d[7], b->d[2]);
+    extract(l[9]);
+    muladd(a->d[3], b->d[7]);
+    muladd(a->d[4], b->d[6]);
+    muladd(a->d[5], b->d[5]);
+    muladd(a->d[6], b->d[4]);
+    muladd(a->d[7], b->d[3]);
+    extract(l[10]);
+    muladd(a->d[4], b->d[7]);
+    muladd(a->d[5], b->d[6]);
+    muladd(a->d[6], b->d[5]);
+    muladd(a->d[7], b->d[4]);
+    extract(l[11]);
+    muladd(a->d[5], b->d[7]);
+    muladd(a->d[6], b->d[6]);
+    muladd(a->d[7], b->d[5]);
+    extract(l[12]);
+    muladd(a->d[6], b->d[7]);
+    muladd(a->d[7], b->d[6]);
+    extract(l[13]);
+    muladd_fast(a->d[7], b->d[7]);
+    extract_fast(l[14]);
+    VERIFY_CHECK(c1 == 0);
+    l[15] = c0;
+}
+
+static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar_t *a) {
+    /* 96 bit accumulator. */
+    uint32_t c0 = 0, c1 = 0, c2 = 0;
+
+    /* l[0..15] = a[0..7]^2. */
+    muladd_fast(a->d[0], a->d[0]);
+    extract_fast(l[0]);
+    muladd2(a->d[0], a->d[1]);
+    extract(l[1]);
+    muladd2(a->d[0], a->d[2]);
+    muladd(a->d[1], a->d[1]);
+    extract(l[2]);
+    muladd2(a->d[0], a->d[3]);
+    muladd2(a->d[1], a->d[2]);
+    extract(l[3]);
+    muladd2(a->d[0], a->d[4]);
+    muladd2(a->d[1], a->d[3]);
+    muladd(a->d[2], a->d[2]);
+    extract(l[4]);
+    muladd2(a->d[0], a->d[5]);
+    muladd2(a->d[1], a->d[4]);
+    muladd2(a->d[2], a->d[3]);
+    extract(l[5]);
+    muladd2(a->d[0], a->d[6]);
+    muladd2(a->d[1], a->d[5]);
+    muladd2(a->d[2], a->d[4]);
+    muladd(a->d[3], a->d[3]);
+    extract(l[6]);
+    muladd2(a->d[0], a->d[7]);
+    muladd2(a->d[1], a->d[6]);
+    muladd2(a->d[2], a->d[5]);
+    muladd2(a->d[3], a->d[4]);
+    extract(l[7]);
+    muladd2(a->d[1], a->d[7]);
+    muladd2(a->d[2], a->d[6]);
+    muladd2(a->d[3], a->d[5]);
+    muladd(a->d[4], a->d[4]);
+    extract(l[8]);
+    muladd2(a->d[2], a->d[7]);
+    muladd2(a->d[3], a->d[6]);
+    muladd2(a->d[4], a->d[5]);
+    extract(l[9]);
+    muladd2(a->d[3], a->d[7]);
+    muladd2(a->d[4], a->d[6]);
+    muladd(a->d[5], a->d[5]);
+    extract(l[10]);
+    muladd2(a->d[4], a->d[7]);
+    muladd2(a->d[5], a->d[6]);
+    extract(l[11]);
+    muladd2(a->d[5], a->d[7]);
+    muladd(a->d[6], a->d[6]);
+    extract(l[12]);
+    muladd2(a->d[6], a->d[7]);
+    extract(l[13]);
+    muladd_fast(a->d[7], a->d[7]);
+    extract_fast(l[14]);
+    VERIFY_CHECK(c1 == 0);
+    l[15] = c0;
+}
+
+#undef sumadd
+#undef sumadd_fast
+#undef muladd
+#undef muladd_fast
+#undef muladd2
+#undef extract
+#undef extract_fast
+
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    uint32_t l[16];
+    secp256k1_scalar_mul_512(l, a, b);
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint32_t l[16];
+    secp256k1_scalar_sqr_512(l, a);
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+#ifdef USE_ENDOMORPHISM
+static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+    r1->d[0] = a->d[0];
+    r1->d[1] = a->d[1];
+    r1->d[2] = a->d[2];
+    r1->d[3] = a->d[3];
+    r1->d[4] = 0;
+    r1->d[5] = 0;
+    r1->d[6] = 0;
+    r1->d[7] = 0;
+    r2->d[0] = a->d[4];
+    r2->d[1] = a->d[5];
+    r2->d[2] = a->d[6];
+    r2->d[3] = a->d[7];
+    r2->d[4] = 0;
+    r2->d[5] = 0;
+    r2->d[6] = 0;
+    r2->d[7] = 0;
+}
+#endif
+
+SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
+    uint32_t l[16];
+    unsigned int shiftlimbs;
+    unsigned int shiftlow;
+    unsigned int shifthigh;
+    VERIFY_CHECK(shift >= 256);
+    secp256k1_scalar_mul_512(l, a, b);
+    shiftlimbs = shift >> 5;
+    shiftlow = shift & 0x1F;
+    shifthigh = 32 - shiftlow;
+    r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[3] = shift < 416 ? (l[3 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[4 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[4] = shift < 384 ? (l[4 + shiftlimbs] >> shiftlow | (shift < 352 && shiftlow ? (l[5 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
+    if ((l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1) {
+        secp256k1_scalar_add_bit(r, 0);
+    }
+}
+
+#endif
diff --git a/secp256k1/scalar_impl.h b/secp256k1/scalar_impl.h
new file mode 100644
index 000000000..33824983e
--- /dev/null
+++ b/secp256k1/scalar_impl.h
@@ -0,0 +1,327 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_IMPL_H_
+#define _SECP256K1_SCALAR_IMPL_H_
+
+#include <string.h>
+
+#include "group.h"
+#include "scalar.h"
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_SCALAR_4X64)
+#include "scalar_4x64_impl.h"
+#elif defined(USE_SCALAR_8X32)
+#include "scalar_8x32_impl.h"
+#else
+#error "Please select scalar implementation"
+#endif
+
+#ifndef USE_NUM_NONE
+static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a) {
+    unsigned char c[32];
+    secp256k1_scalar_get_b32(c, a);
+    secp256k1_num_set_bin(r, c, 32);
+}
+
+/** secp256k1 curve order, see secp256k1_ecdsa_const_order_as_fe in ecdsa_impl.h */
+static void secp256k1_scalar_order_get_num(secp256k1_num_t *r) {
+    static const unsigned char order[32] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
+        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
+        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
+    };
+    secp256k1_num_set_bin(r, order, 32);
+}
+#endif
+
+static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
+    secp256k1_scalar_t *t;
+    int i;
+    /* First compute x ^ (2^N - 1) for some values of N. */
+    secp256k1_scalar_t x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
+
+    secp256k1_scalar_sqr(&x2,  x);
+    secp256k1_scalar_mul(&x2, &x2,  x);
+
+    secp256k1_scalar_sqr(&x3, &x2);
+    secp256k1_scalar_mul(&x3, &x3,  x);
+
+    secp256k1_scalar_sqr(&x4, &x3);
+    secp256k1_scalar_mul(&x4, &x4,  x);
+
+    secp256k1_scalar_sqr(&x6, &x4);
+    secp256k1_scalar_sqr(&x6, &x6);
+    secp256k1_scalar_mul(&x6, &x6, &x2);
+
+    secp256k1_scalar_sqr(&x7, &x6);
+    secp256k1_scalar_mul(&x7, &x7,  x);
+
+    secp256k1_scalar_sqr(&x8, &x7);
+    secp256k1_scalar_mul(&x8, &x8,  x);
+
+    secp256k1_scalar_sqr(&x15, &x8);
+    for (i = 0; i < 6; i++) {
+        secp256k1_scalar_sqr(&x15, &x15);
+    }
+    secp256k1_scalar_mul(&x15, &x15, &x7);
+
+    secp256k1_scalar_sqr(&x30, &x15);
+    for (i = 0; i < 14; i++) {
+        secp256k1_scalar_sqr(&x30, &x30);
+    }
+    secp256k1_scalar_mul(&x30, &x30, &x15);
+
+    secp256k1_scalar_sqr(&x60, &x30);
+    for (i = 0; i < 29; i++) {
+        secp256k1_scalar_sqr(&x60, &x60);
+    }
+    secp256k1_scalar_mul(&x60, &x60, &x30);
+
+    secp256k1_scalar_sqr(&x120, &x60);
+    for (i = 0; i < 59; i++) {
+        secp256k1_scalar_sqr(&x120, &x120);
+    }
+    secp256k1_scalar_mul(&x120, &x120, &x60);
+
+    secp256k1_scalar_sqr(&x127, &x120);
+    for (i = 0; i < 6; i++) {
+        secp256k1_scalar_sqr(&x127, &x127);
+    }
+    secp256k1_scalar_mul(&x127, &x127, &x7);
+
+    /* Then accumulate the final result (t starts at x127). */
+    t = &x127;
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 4; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 4; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 3; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 4; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 5; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 4; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 5; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x4); /* 1111 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 3; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 4; i++) { /* 000 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 10; i++) { /* 0000000 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 4; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (i = 0; i < 9; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x8); /* 11111111 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 3; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 3; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 5; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x4); /* 1111 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 5; i++) { /* 000 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 4; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 2; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 8; i++) { /* 000000 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 3; i++) { /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (i = 0; i < 3; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 6; i++) { /* 00000 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (i = 0; i < 8; i++) { /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    }
+    secp256k1_scalar_mul(r, t, &x6); /* 111111 */
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
+#if defined(USE_SCALAR_INV_BUILTIN)
+    secp256k1_scalar_inverse(r, x);
+#elif defined(USE_SCALAR_INV_NUM)
+    unsigned char b[32];
+    secp256k1_num_t n, m;
+    secp256k1_scalar_get_b32(b, x);
+    secp256k1_num_set_bin(&n, b, 32);
+    secp256k1_scalar_order_get_num(&m);
+    secp256k1_num_mod_inverse(&n, &n, &m);
+    secp256k1_num_get_bin(b, 32, &n);
+    secp256k1_scalar_set_b32(r, b, NULL);
+#else
+#error "Please select scalar inverse implementation"
+#endif
+}
+
+#ifdef USE_ENDOMORPHISM
+/**
+ * The Secp256k1 curve has an endomorphism, where lambda * (x, y) = (beta * x, y), where
+ * lambda is {0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
+ *            0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72}
+ *
+ * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
+ * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
+ * and k2 have a small size.
+ * It relies on constants a1, b1, a2, b2. These constants for the value of lambda above are:
+ *
+ * - a1 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ * - b1 =     -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3}
+ * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8}
+ * - b2 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ *
+ * The algorithm then computes c1 = round(b1 * k / n) and c2 = round(b2 * k / n), and gives
+ * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and
+ * compute k1 as k - k2 * lambda, avoiding the need for constants a1 and a2.
+ *
+ * g1, g2 are precomputed constants used to replace division with a rounded multiplication
+ * when decomposing the scalar for an endomorphism-based point multiplication.
+ *
+ * The possibility of using precomputed estimates is mentioned in "Guide to Elliptic Curve
+ * Cryptography" (Hankerson, Menezes, Vanstone) in section 3.5.
+ *
+ * The derivation is described in the paper "Efficient Software Implementation of Public-Key
+ * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez),
+ * Section 4.3 (here we use a somewhat higher-precision estimate):
+ * d = a1*b2 - b1*a2
+ * g1 = round((2^272)*b2/d)
+ * g2 = round((2^272)*b1/d)
+ *
+ * (Note that 'd' is also equal to the curve order here because [a1,b1] and [a2,b2] are found
+ * as outputs of the Extended Euclidean Algorithm on inputs 'order' and 'lambda').
+ *
+ * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
+ */
+
+static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+    secp256k1_scalar_t c1, c2;
+    static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
+        0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
+        0xA880B9FCUL, 0x8EC739C2UL, 0xE0CFC810UL, 0xB51283CFUL
+    );
+    static const secp256k1_scalar_t minus_b1 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+        0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C3UL
+    );
+    static const secp256k1_scalar_t minus_b2 = SECP256K1_SCALAR_CONST(
+        0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+        0x8A280AC5UL, 0x0774346DUL, 0xD765CDA8UL, 0x3DB1562CUL
+    );
+    static const secp256k1_scalar_t g1 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00003086UL,
+        0xD221A7D4UL, 0x6BCDE86CUL, 0x90E49284UL, 0xEB153DABUL
+    );
+    static const secp256k1_scalar_t g2 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0000E443UL,
+        0x7ED6010EUL, 0x88286F54UL, 0x7FA90ABFUL, 0xE4C42212UL
+    );
+    VERIFY_CHECK(r1 != a);
+    VERIFY_CHECK(r2 != a);
+    secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
+    secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
+    secp256k1_scalar_mul(&c1, &c1, &minus_b1);
+    secp256k1_scalar_mul(&c2, &c2, &minus_b2);
+    secp256k1_scalar_add(r2, &c1, &c2);
+    secp256k1_scalar_mul(r1, r2, &minus_lambda);
+    secp256k1_scalar_add(r1, r1, a);
+}
+#endif
+
+#endif
diff --git a/secp256k1/secp256k1.c b/secp256k1/secp256k1.c
index b9b57d4a4..d6192dc4e 100644
--- a/secp256k1/secp256k1.c
+++ b/secp256k1/secp256k1.c
@@ -1,277 +1,419 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include "impl/num.h"
-#include "impl/field.h"
-#include "impl/group.h"
-#include "impl/ecmult.h"
-#include "impl/ecdsa.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void secp256k1_start(void) {
-    secp256k1_fe_start();
-    secp256k1_ge_start();
-    secp256k1_ecmult_start();
+/**********************************************************************
+ * Copyright (c) 2013-2015 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#define SECP256K1_BUILD (1)
+
+#include "include/secp256k1.h"
+
+#include "util.h"
+#include "num_impl.h"
+#include "field_impl.h"
+#include "scalar_impl.h"
+#include "group_impl.h"
+#include "ecmult_impl.h"
+#include "ecmult_gen_impl.h"
+#include "ecdsa_impl.h"
+#include "eckey_impl.h"
+#include "hash_impl.h"
+
+struct secp256k1_context_struct {
+    secp256k1_ecmult_context_t ecmult_ctx;
+    secp256k1_ecmult_gen_context_t ecmult_gen_ctx;
+};
+
+secp256k1_context_t* secp256k1_context_create(int flags) {
+    secp256k1_context_t* ret = (secp256k1_context_t*)checked_malloc(sizeof(secp256k1_context_t));
+
+    secp256k1_ecmult_context_init(&ret->ecmult_ctx);
+    secp256k1_ecmult_gen_context_init(&ret->ecmult_gen_ctx);
+
+    if (flags & SECP256K1_CONTEXT_SIGN) {
+        secp256k1_ecmult_gen_context_build(&ret->ecmult_gen_ctx);
+    }
+    if (flags & SECP256K1_CONTEXT_VERIFY) {
+        secp256k1_ecmult_context_build(&ret->ecmult_ctx);
+    }
+
+    return ret;
 }
 
-void secp256k1_stop(void) {
-    secp256k1_ecmult_stop();
-    secp256k1_ge_stop();
-    secp256k1_fe_stop();
+secp256k1_context_t* secp256k1_context_clone(const secp256k1_context_t* ctx) {
+    secp256k1_context_t* ret = (secp256k1_context_t*)checked_malloc(sizeof(secp256k1_context_t));
+    secp256k1_ecmult_context_clone(&ret->ecmult_ctx, &ctx->ecmult_ctx);
+    secp256k1_ecmult_gen_context_clone(&ret->ecmult_gen_ctx, &ctx->ecmult_gen_ctx);
+    return ret;
 }
 
-int secp256k1_ecdsa_verify(const unsigned char *msg, int msglen, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) {
-    int ret = -3;
-    secp256k1_num_t m; 
-    secp256k1_num_init(&m);
-    secp256k1_ecdsa_sig_t s;
-    secp256k1_ecdsa_sig_init(&s);
+void secp256k1_context_destroy(secp256k1_context_t* ctx) {
+    secp256k1_ecmult_context_clear(&ctx->ecmult_ctx);
+    secp256k1_ecmult_gen_context_clear(&ctx->ecmult_gen_ctx);
+
+    free(ctx);
+}
+
+int secp256k1_ecdsa_verify(const secp256k1_context_t* ctx, const unsigned char *msg32, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) {
     secp256k1_ge_t q;
-    secp256k1_num_set_bin(&m, msg, msglen);
+    secp256k1_ecdsa_sig_t s;
+    secp256k1_scalar_t m;
+    int ret = -3;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx));
+    DEBUG_CHECK(msg32 != NULL);
+    DEBUG_CHECK(sig != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+
+    secp256k1_scalar_set_b32(&m, msg32, NULL);
 
-    if (!secp256k1_ecdsa_pubkey_parse(&q, pubkey, pubkeylen)) {
+    if (secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen)) {
+        if (secp256k1_ecdsa_sig_parse(&s, sig, siglen)) {
+            if (secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &s, &q, &m)) {
+                /* success is 1, all other values are fail */
+                ret = 1;
+            } else {
+                ret = 0;
+            }
+        } else {
+            ret = -2;
+        }
+    } else {
         ret = -1;
-        goto end;
     }
-    if (!secp256k1_ecdsa_sig_parse(&s, sig, siglen)) {
-        ret = -2;
-        goto end;
-    }
-    if (!secp256k1_ecdsa_sig_verify(&s, &q, &m)) {
-        ret = 0;
-        goto end;
-    }
-    ret = 1;
-end:
-    secp256k1_ecdsa_sig_free(&s);
-    secp256k1_num_free(&m);
+
     return ret;
 }
 
-int secp256k1_ecdsa_sign(const unsigned char *message, int messagelen, unsigned char *signature, int *signaturelen, const unsigned char *seckey, const unsigned char *nonce) {
-    secp256k1_num_t sec, non, msg;
-    secp256k1_num_init(&sec);
-    secp256k1_num_init(&non);
-    secp256k1_num_init(&msg);
-    secp256k1_num_set_bin(&sec, seckey, 32);
-    secp256k1_num_set_bin(&non, nonce, 32);
-    secp256k1_num_set_bin(&msg, message, messagelen);
+static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, unsigned int counter, const void *data) {
+   secp256k1_rfc6979_hmac_sha256_t rng;
+   unsigned int i;
+   secp256k1_rfc6979_hmac_sha256_initialize(&rng, key32, 32, msg32, 32, (const unsigned char*)data, data != NULL ? 32 : 0);
+   for (i = 0; i <= counter; i++) {
+       secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+   }
+   secp256k1_rfc6979_hmac_sha256_finalize(&rng);
+   return 1;
+}
+
+const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979 = nonce_function_rfc6979;
+const secp256k1_nonce_function_t secp256k1_nonce_function_default = nonce_function_rfc6979;
+
+int secp256k1_ecdsa_sign(const secp256k1_context_t* ctx, const unsigned char *msg32, unsigned char *signature, int *signaturelen, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata) {
     secp256k1_ecdsa_sig_t sig;
-    secp256k1_ecdsa_sig_init(&sig);
-    int ret = secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, NULL);
-    if (ret) {
-        secp256k1_ecdsa_sig_serialize(signature, signaturelen, &sig);
+    secp256k1_scalar_t sec, non, msg;
+    int ret = 0;
+    int overflow = 0;
+    unsigned int count = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    DEBUG_CHECK(msg32 != NULL);
+    DEBUG_CHECK(signature != NULL);
+    DEBUG_CHECK(signaturelen != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    if (noncefp == NULL) {
+        noncefp = secp256k1_nonce_function_default;
+    }
+
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    /* Fail if the secret key is invalid. */
+    if (!overflow && !secp256k1_scalar_is_zero(&sec)) {
+        secp256k1_scalar_set_b32(&msg, msg32, NULL);
+        while (1) {
+            unsigned char nonce32[32];
+            ret = noncefp(nonce32, msg32, seckey, count, noncedata);
+            if (!ret) {
+                break;
+            }
+            secp256k1_scalar_set_b32(&non, nonce32, &overflow);
+            memset(nonce32, 0, 32);
+            if (!secp256k1_scalar_is_zero(&non) && !overflow) {
+                if (secp256k1_ecdsa_sig_sign(&ctx->ecmult_gen_ctx, &sig, &sec, &msg, &non, NULL)) {
+                    break;
+                }
+            }
+            count++;
+        }
+        if (ret) {
+            ret = secp256k1_ecdsa_sig_serialize(signature, signaturelen, &sig);
+        }
+        secp256k1_scalar_clear(&msg);
+        secp256k1_scalar_clear(&non);
+        secp256k1_scalar_clear(&sec);
+    }
+    if (!ret) {
+        *signaturelen = 0;
     }
-    secp256k1_ecdsa_sig_free(&sig);
-    secp256k1_num_free(&msg);
-    secp256k1_num_free(&non);
-    secp256k1_num_free(&sec);
     return ret;
 }
 
-int secp256k1_ecdsa_sign_compact(const unsigned char *message, int messagelen, unsigned char *sig64, const unsigned char *seckey, const unsigned char *nonce, int *recid) {
-    secp256k1_num_t sec, non, msg;
-    secp256k1_num_init(&sec);
-    secp256k1_num_init(&non);
-    secp256k1_num_init(&msg);
-    secp256k1_num_set_bin(&sec, seckey, 32);
-    secp256k1_num_set_bin(&non, nonce, 32);
-    secp256k1_num_set_bin(&msg, message, messagelen);
+int secp256k1_ecdsa_sign_compact(const secp256k1_context_t* ctx, const unsigned char *msg32, unsigned char *sig64, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata, int *recid) {
     secp256k1_ecdsa_sig_t sig;
-    secp256k1_ecdsa_sig_init(&sig);
-    int ret = secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, recid);
-    if (ret) {
-        secp256k1_num_get_bin(sig64, 32, &sig.r);
-        secp256k1_num_get_bin(sig64 + 32, 32, &sig.s);
+    secp256k1_scalar_t sec, non, msg;
+    int ret = 0;
+    int overflow = 0;
+    unsigned int count = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    DEBUG_CHECK(msg32 != NULL);
+    DEBUG_CHECK(sig64 != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    if (noncefp == NULL) {
+        noncefp = secp256k1_nonce_function_default;
+    }
+
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    /* Fail if the secret key is invalid. */
+    if (!overflow && !secp256k1_scalar_is_zero(&sec)) {
+        secp256k1_scalar_set_b32(&msg, msg32, NULL);
+        while (1) {
+            unsigned char nonce32[32];
+            ret = noncefp(nonce32, msg32, seckey, count, noncedata);
+            if (!ret) {
+                break;
+            }
+            secp256k1_scalar_set_b32(&non, nonce32, &overflow);
+            memset(nonce32, 0, 32);
+            if (!secp256k1_scalar_is_zero(&non) && !overflow) {
+                if (secp256k1_ecdsa_sig_sign(&ctx->ecmult_gen_ctx, &sig, &sec, &msg, &non, recid)) {
+                    break;
+                }
+            }
+            count++;
+        }
+        if (ret) {
+            secp256k1_scalar_get_b32(sig64, &sig.r);
+            secp256k1_scalar_get_b32(sig64 + 32, &sig.s);
+        }
+        secp256k1_scalar_clear(&msg);
+        secp256k1_scalar_clear(&non);
+        secp256k1_scalar_clear(&sec);
+    }
+    if (!ret) {
+        memset(sig64, 0, 64);
     }
-    secp256k1_ecdsa_sig_free(&sig);
-    secp256k1_num_free(&msg);
-    secp256k1_num_free(&non);
-    secp256k1_num_free(&sec);
     return ret;
 }
 
-int secp256k1_ecdsa_recover_compact(const unsigned char *msg, int msglen, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) {
-    int ret = 0;
-    secp256k1_num_t m; 
-    secp256k1_num_init(&m);
+int secp256k1_ecdsa_recover_compact(const secp256k1_context_t* ctx, const unsigned char *msg32, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) {
+    secp256k1_ge_t q;
     secp256k1_ecdsa_sig_t sig;
-    secp256k1_ecdsa_sig_init(&sig);
-    secp256k1_num_set_bin(&sig.r, sig64, 32);
-    secp256k1_num_set_bin(&sig.s, sig64 + 32, 32);
-    secp256k1_num_set_bin(&m, msg, msglen);
+    secp256k1_scalar_t m;
+    int ret = 0;
+    int overflow = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx));
+    DEBUG_CHECK(msg32 != NULL);
+    DEBUG_CHECK(sig64 != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+    DEBUG_CHECK(recid >= 0 && recid <= 3);
 
-    secp256k1_ge_t q;
-    if (secp256k1_ecdsa_sig_recover(&sig, &q, &m, recid)) {
-        secp256k1_ecdsa_pubkey_serialize(&q, pubkey, pubkeylen, compressed);
-        ret = 1;
+    secp256k1_scalar_set_b32(&sig.r, sig64, &overflow);
+    if (!overflow) {
+        secp256k1_scalar_set_b32(&sig.s, sig64 + 32, &overflow);
+        if (!overflow) {
+            secp256k1_scalar_set_b32(&m, msg32, NULL);
+
+            if (secp256k1_ecdsa_sig_recover(&ctx->ecmult_ctx, &sig, &q, &m, recid)) {
+                ret = secp256k1_eckey_pubkey_serialize(&q, pubkey, pubkeylen, compressed);
+            }
+        }
     }
-    secp256k1_ecdsa_sig_free(&sig);
-    secp256k1_num_free(&m);
     return ret;
 }
 
-int secp256k1_ecdsa_seckey_verify(const unsigned char *seckey) {
-    secp256k1_num_t sec;
-    secp256k1_num_init(&sec);
-    secp256k1_num_set_bin(&sec, seckey, 32);
-    int ret = !secp256k1_num_is_zero(&sec) &&
-              (secp256k1_num_cmp(&sec, &secp256k1_ge_consts->order) < 0);
-    secp256k1_num_free(&sec);
+int secp256k1_ec_seckey_verify(const secp256k1_context_t* ctx, const unsigned char *seckey) {
+    secp256k1_scalar_t sec;
+    int ret;
+    int overflow;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    (void)ctx;
+
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    ret = !secp256k1_scalar_is_zero(&sec) && !overflow;
+    secp256k1_scalar_clear(&sec);
     return ret;
 }
 
-int secp256k1_ecdsa_pubkey_verify(const unsigned char *pubkey, int pubkeylen) {
+int secp256k1_ec_pubkey_verify(const secp256k1_context_t* ctx, const unsigned char *pubkey, int pubkeylen) {
     secp256k1_ge_t q;
-    return secp256k1_ecdsa_pubkey_parse(&q, pubkey, pubkeylen);
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    (void)ctx;
+
+    return secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen);
 }
 
-int secp256k1_ecdsa_pubkey_create(unsigned char *pubkey, int *pubkeylen, const unsigned char *seckey, int compressed) {
-    secp256k1_num_t sec;
-    secp256k1_num_init(&sec);
-    secp256k1_num_set_bin(&sec, seckey, 32);
+int secp256k1_ec_pubkey_create(const secp256k1_context_t* ctx, unsigned char *pubkey, int *pubkeylen, const unsigned char *seckey, int compressed) {
     secp256k1_gej_t pj;
-    secp256k1_ecmult_gen(&pj, &sec);
     secp256k1_ge_t p;
-    secp256k1_ge_set_gej(&p, &pj);
-    secp256k1_ecdsa_pubkey_serialize(&p, pubkey, pubkeylen, compressed);
-    return 1;
+    secp256k1_scalar_t sec;
+    int overflow;
+    int ret = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+    DEBUG_CHECK(seckey != NULL);
+
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    if (!overflow) {
+        secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pj, &sec);
+        secp256k1_scalar_clear(&sec);
+        secp256k1_ge_set_gej(&p, &pj);
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, compressed);
+    }
+    if (!ret) {
+        *pubkeylen = 0;
+    }
+    return ret;
 }
 
-int secp256k1_ecdsa_pubkey_decompress(unsigned char *pubkey, int *pubkeylen) {
+int secp256k1_ec_pubkey_decompress(const secp256k1_context_t* ctx, unsigned char *pubkey, int *pubkeylen) {
     secp256k1_ge_t p;
-    if (!secp256k1_ecdsa_pubkey_parse(&p, pubkey, *pubkeylen))
-        return 0;
-    secp256k1_ecdsa_pubkey_serialize(&p, pubkey, pubkeylen, 0);
-    return 1;
+    int ret = 0;
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+    (void)ctx;
+
+    if (secp256k1_eckey_pubkey_parse(&p, pubkey, *pubkeylen)) {
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, 0);
+    }
+    return ret;
 }
 
-int secp256k1_ecdsa_privkey_tweak_add(unsigned char *seckey, const unsigned char *tweak) {
-    int ret = 1;
-    secp256k1_num_t term;
-    secp256k1_num_init(&term);
-    secp256k1_num_set_bin(&term, tweak, 32);
-    if (secp256k1_num_cmp(&term, &secp256k1_ge_consts->order) >= 0)
-        ret = 0;
-    secp256k1_num_t sec;
-    secp256k1_num_init(&sec);
+int secp256k1_ec_privkey_tweak_add(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *tweak) {
+    secp256k1_scalar_t term;
+    secp256k1_scalar_t sec;
+    int ret = 0;
+    int overflow = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+    (void)ctx;
+
+    secp256k1_scalar_set_b32(&term, tweak, &overflow);
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+
+    ret = secp256k1_eckey_privkey_tweak_add(&sec, &term) && !overflow;
     if (ret) {
-        secp256k1_num_set_bin(&sec, seckey, 32);
-        secp256k1_num_add(&sec, &sec, &term);
-        secp256k1_num_mod(&sec, &secp256k1_ge_consts->order);
-        if (secp256k1_num_is_zero(&sec))
-            ret = 0;
+        secp256k1_scalar_get_b32(seckey, &sec);
     }
-    if (ret)
-        secp256k1_num_get_bin(seckey, 32, &sec);
-    secp256k1_num_free(&sec);
-    secp256k1_num_free(&term);
+
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_clear(&term);
     return ret;
 }
 
-int secp256k1_ecdsa_pubkey_tweak_add(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
-    int ret = 1;
-    secp256k1_num_t term;
-    secp256k1_num_init(&term);
-    secp256k1_num_set_bin(&term, tweak, 32);
-    if (secp256k1_num_cmp(&term, &secp256k1_ge_consts->order) >= 0)
-        ret = 0;
+int secp256k1_ec_pubkey_tweak_add(const secp256k1_context_t* ctx, unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
     secp256k1_ge_t p;
-    if (ret) {
-        if (!secp256k1_ecdsa_pubkey_parse(&p, pubkey, pubkeylen))
-            ret = 0;
-    }
-    if (ret) {
-        secp256k1_gej_t pt;
-        secp256k1_ecmult_gen(&pt, &term);
-        secp256k1_gej_add_ge(&pt, &pt, &p);
-        if (secp256k1_gej_is_infinity(&pt))
-            ret = 0;
-        secp256k1_ge_set_gej(&p, &pt);
-        int oldlen = pubkeylen;
-        secp256k1_ecdsa_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
-        assert(pubkeylen == oldlen);
+    secp256k1_scalar_t term;
+    int ret = 0;
+    int overflow = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx));
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_scalar_set_b32(&term, tweak, &overflow);
+    if (!overflow) {
+        ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+        if (ret) {
+            ret = secp256k1_eckey_pubkey_tweak_add(&ctx->ecmult_ctx, &p, &term);
+        }
+        if (ret) {
+            int oldlen = pubkeylen;
+            ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+            VERIFY_CHECK(pubkeylen == oldlen);
+        }
     }
-    secp256k1_num_free(&term);
+
     return ret;
 }
 
-int secp256k1_ecdsa_privkey_tweak_mul(unsigned char *seckey, const unsigned char *tweak) {
-    int ret = 1;
-    secp256k1_num_t factor;
-    secp256k1_num_init(&factor);
-    secp256k1_num_set_bin(&factor, tweak, 32);
-    if (secp256k1_num_is_zero(&factor))
-        ret = 0;
-    if (secp256k1_num_cmp(&factor, &secp256k1_ge_consts->order) >= 0)
-        ret = 0;
-    secp256k1_num_t sec;
-    secp256k1_num_init(&sec);
+int secp256k1_ec_privkey_tweak_mul(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *tweak) {
+    secp256k1_scalar_t factor;
+    secp256k1_scalar_t sec;
+    int ret = 0;
+    int overflow = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+    (void)ctx;
+
+    secp256k1_scalar_set_b32(&factor, tweak, &overflow);
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+    ret = secp256k1_eckey_privkey_tweak_mul(&sec, &factor) && !overflow;
     if (ret) {
-        secp256k1_num_set_bin(&sec, seckey, 32);
-        secp256k1_num_mod_mul(&sec, &sec, &factor, &secp256k1_ge_consts->order);
+        secp256k1_scalar_get_b32(seckey, &sec);
     }
-    if (ret)
-        secp256k1_num_get_bin(seckey, 32, &sec);
-    secp256k1_num_free(&sec);
-    secp256k1_num_free(&factor);
+
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_clear(&factor);
     return ret;
 }
 
-int secp256k1_ecdsa_pubkey_tweak_mul(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
-    int ret = 1;
-    secp256k1_num_t factor;
-    secp256k1_num_init(&factor);
-    secp256k1_num_set_bin(&factor, tweak, 32);
-    if (secp256k1_num_is_zero(&factor))
-        ret = 0;
-    if (secp256k1_num_cmp(&factor, &secp256k1_ge_consts->order) >= 0)
-        ret = 0;
+int secp256k1_ec_pubkey_tweak_mul(const secp256k1_context_t* ctx, unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
     secp256k1_ge_t p;
-    if (ret) {
-        if (!secp256k1_ecdsa_pubkey_parse(&p, pubkey, pubkeylen))
-            ret = 0;
-    }
-    if (ret) {
-        secp256k1_num_t zero;
-        secp256k1_num_init(&zero);
-        secp256k1_num_set_int(&zero, 0);
-        secp256k1_gej_t pt;
-        secp256k1_gej_set_ge(&pt, &p);
-        secp256k1_ecmult(&pt, &pt, &factor, &zero);
-        secp256k1_num_free(&zero);
-        secp256k1_ge_set_gej(&p, &pt);
-        int oldlen = pubkeylen;
-        secp256k1_ecdsa_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
-        assert(pubkeylen == oldlen);
+    secp256k1_scalar_t factor;
+    int ret = 0;
+    int overflow = 0;
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx));
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_scalar_set_b32(&factor, tweak, &overflow);
+    if (!overflow) {
+        ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+        if (ret) {
+            ret = secp256k1_eckey_pubkey_tweak_mul(&ctx->ecmult_ctx, &p, &factor);
+        }
+        if (ret) {
+            int oldlen = pubkeylen;
+            ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+            VERIFY_CHECK(pubkeylen == oldlen);
+        }
     }
-    secp256k1_num_free(&factor);
+
     return ret;
 }
 
-int secp256k1_ecdsa_privkey_export(const unsigned char *seckey, unsigned char *privkey, int *privkeylen, int compressed) {
-    secp256k1_num_t key;
-    secp256k1_num_init(&key);
-    secp256k1_num_set_bin(&key, seckey, 32);
-    int ret = secp256k1_ecdsa_privkey_serialize(privkey, privkeylen, &key, compressed);
-    secp256k1_num_free(&key);
+int secp256k1_ec_privkey_export(const secp256k1_context_t* ctx, const unsigned char *seckey, unsigned char *privkey, int *privkeylen, int compressed) {
+    secp256k1_scalar_t key;
+    int ret = 0;
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(privkey != NULL);
+    DEBUG_CHECK(privkeylen != NULL);
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+
+    secp256k1_scalar_set_b32(&key, seckey, NULL);
+    ret = secp256k1_eckey_privkey_serialize(&ctx->ecmult_gen_ctx, privkey, privkeylen, &key, compressed);
+    secp256k1_scalar_clear(&key);
     return ret;
 }
 
-int secp256k1_ecdsa_privkey_import(unsigned char *seckey, const unsigned char *privkey, int privkeylen) {
-    secp256k1_num_t key;
-    secp256k1_num_init(&key);
-    int ret = secp256k1_ecdsa_privkey_parse(&key, privkey, privkeylen);
-    if (ret)
-        secp256k1_num_get_bin(seckey, 32, &key);
-    secp256k1_num_free(&key);
+int secp256k1_ec_privkey_import(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *privkey, int privkeylen) {
+    secp256k1_scalar_t key;
+    int ret = 0;
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(privkey != NULL);
+    (void)ctx;
+
+    ret = secp256k1_eckey_privkey_parse(&key, privkey, privkeylen);
+    if (ret) {
+        secp256k1_scalar_get_b32(seckey, &key);
+    }
+    secp256k1_scalar_clear(&key);
     return ret;
 }
 
-#ifdef __cplusplus
+int secp256k1_context_randomize(secp256k1_context_t* ctx, const unsigned char *seed32) {
+    DEBUG_CHECK(ctx != NULL);
+    DEBUG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32);
+    return 1;
 }
-#endif
diff --git a/secp256k1/secp256k1.h b/secp256k1/secp256k1.h
deleted file mode 100644
index fd6d6b1f4..000000000
--- a/secp256k1/secp256k1.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef _SECP256K1_
-#define _SECP256K1_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Initialize the library. This may take some time (10-100 ms).
- *  You need to call this before calling any other function.
- *  It cannot run in parallel with any other functions, but once
- *  secp256k1_start() returns, all other functions are thread-safe.
- */
-void secp256k1_start(void);
-
-/** Free all memory associated with this library. After this, no
- *  functions can be called anymore, except secp256k1_start()
- */
-void secp256k1_stop(void);
-
-/** Verify an ECDSA signature.
- *  Returns: 1: correct signature
- *           0: incorrect signature
- *          -1: invalid public key
- *          -2: invalid signature
- */
-int secp256k1_ecdsa_verify(const unsigned char *msg, int msglen,
-                           const unsigned char *sig, int siglen,
-                           const unsigned char *pubkey, int pubkeylen);
-
-/** Create an ECDSA signature.
- *  Returns: 1: signature created
- *           0: nonce invalid, try another one
- *  In:      msg:    the message being signed
- *           msglen: the length of the message being signed
- *           seckey: pointer to a 32-byte secret key (assumed to be valid)
- *           nonce:  pointer to a 32-byte nonce (generated with a cryptographic PRNG)
- *  Out:     sig:    pointer to a 72-byte array where the signature will be placed.
- *           siglen: pointer to an int, which will be updated to the signature length (<=72).
- */
-int secp256k1_ecdsa_sign(const unsigned char *msg, int msglen,
-                         unsigned char *sig, int *siglen,
-                         const unsigned char *seckey,
-                         const unsigned char *nonce);
-
-/** Create a compact ECDSA signature (64 byte + recovery id).
- *  Returns: 1: signature created
- *           0: nonce invalid, try another one
- *  In:      msg:    the message being signed
- *           msglen: the length of the message being signed
- *           seckey: pointer to a 32-byte secret key (assumed to be valid)
- *           nonce:  pointer to a 32-byte nonce (generated with a cryptographic PRNG)
- *  Out:     sig:    pointer to a 64-byte array where the signature will be placed.
- *           recid:  pointer to an int, which will be updated to contain the recovery id.
- */
-int secp256k1_ecdsa_sign_compact(const unsigned char *msg, int msglen,
-                                 unsigned char *sig64,
-                                 const unsigned char *seckey,
-                                 const unsigned char *nonce,
-                                 int *recid);
-
-/** Recover an ECDSA public key from a compact signature.
- *  Returns: 1: public key succesfully recovered (which guarantees a correct signature).
- *           0: otherwise.
- *  In:      msg:        the message assumed to be signed
- *           msglen:     the length of the message
- *           sig64:      signature as 64 byte array
- *           compressed: whether to recover a compressed or uncompressed pubkey
- *           recid:      the recovery id (as returned by ecdsa_sign_compact)
- *  Out:     pubkey:     pointer to a 33 or 65 byte array to put the pubkey.
- *           pubkeylen:  pointer to an int that will contain the pubkey length.
- */
-
-int secp256k1_ecdsa_recover_compact(const unsigned char *msg, int msglen,
-                                    const unsigned char *sig64,
-                                    unsigned char *pubkey, int *pubkeylen,
-                                    int compressed, int recid);
-
-/** Verify an ECDSA secret key.
- *  Returns: 1: secret key is valid
- *           0: secret key is invalid
- *  In:      seckey: pointer to a 32-byte secret key
- */
-int secp256k1_ecdsa_seckey_verify(const unsigned char *seckey);
-
-/** Just validate a public key.
- *  Returns: 1: valid public key
- *           0: invalid public key
- */
-int secp256k1_ecdsa_pubkey_verify(const unsigned char *pubkey, int pubkeylen);
-
-/** Compute the public key for a secret key.
- *  In:     compressed: whether the computed public key should be compressed
- *          seckey:     pointer to a 32-byte private key.
- *  Out:    pubkey:     pointer to a 33-byte (if compressed) or 65-byte (if uncompressed)
- *                      area to store the public key.
- *          pubkeylen:  pointer to int that will be updated to contains the pubkey's
- *                      length.
- *  Returns: 1: secret was valid, public key stores
- *           0: secret was invalid, try again.
- */
-int secp256k1_ecdsa_pubkey_create(unsigned char *pubkey, int *pubkeylen, const unsigned char *seckey, int compressed);
-
-int secp256k1_ecdsa_pubkey_decompress(unsigned char *pubkey, int *pubkeylen);
-
-int secp256k1_ecdsa_privkey_export(const unsigned char *seckey,
-                                   unsigned char *privkey, int *privkeylen,
-                                   int compressed);
-
-int secp256k1_ecdsa_privkey_import(unsigned char *seckey,
-                                   const unsigned char *privkey, int privkeylen);
-
-int secp256k1_ecdsa_privkey_tweak_add(unsigned char *seckey, const unsigned char *tweak);
-int secp256k1_ecdsa_pubkey_tweak_add(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak);
-int secp256k1_ecdsa_privkey_tweak_mul(unsigned char *seckey, const unsigned char *tweak);
-int secp256k1_ecdsa_pubkey_tweak_mul(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/secp256k1/util.h b/secp256k1/util.h
index 357c7e06b..ae98639f7 100644
--- a/secp256k1/util.h
+++ b/secp256k1/util.h
@@ -1,19 +1,104 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
 
 #ifndef _SECP256K1_UTIL_H_
 #define _SECP256K1_UTIL_H_
 
-/** Generate a pseudorandom 32-bit number. */
-static uint32_t secp256k1_rand32(void);
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef DETERMINISTIC
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s\n", msg); \
+    abort(); \
+} while(0);
+#else
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, msg); \
+    abort(); \
+} while(0)
+#endif
+
+#ifdef HAVE_BUILTIN_EXPECT
+#define EXPECT(x,c) __builtin_expect((x),(c))
+#else
+#define EXPECT(x,c) (x)
+#endif
+
+#ifdef DETERMINISTIC
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed"); \
+    } \
+} while(0)
+#else
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed: " #cond); \
+    } \
+} while(0)
+#endif
 
-/** Generate a pseudorandom 32-byte array. */
-static void secp256k1_rand256(unsigned char *b32);
+/* Like assert(), but safe to use on expressions with side effects. */
+#ifndef NDEBUG
+#define DEBUG_CHECK CHECK
+#else
+#define DEBUG_CHECK(cond) do { (void)(cond); } while(0)
+#endif
+
+/* Like DEBUG_CHECK(), but when VERIFY is defined instead of NDEBUG not defined. */
+#ifdef VERIFY
+#define VERIFY_CHECK CHECK
+#else
+#define VERIFY_CHECK(cond) do { (void)(cond); } while(0)
+#endif
+
+static SECP256K1_INLINE void *checked_malloc(size_t size) {
+    void *ret = malloc(size);
+    CHECK(ret != NULL);
+    return ret;
+}
 
-/** Generate a pseudorandom 32-byte array with long sequences of zero and one bits. */
-static void secp256k1_rand256_test(unsigned char *b32);
+/* Macro for restrict, when available and not in a VERIFY build. */
+#if defined(SECP256K1_BUILD) && defined(VERIFY)
+# define SECP256K1_RESTRICT
+#else
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(3,0)
+#   define SECP256K1_RESTRICT __restrict__
+#  elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#   define SECP256K1_RESTRICT __restrict
+#  else
+#   define SECP256K1_RESTRICT
+#  endif
+# else
+#  define SECP256K1_RESTRICT restrict
+# endif
+#endif
+
+#if defined(_WIN32)
+# define I64FORMAT "I64d"
+# define I64uFORMAT "I64u"
+#else
+# define I64FORMAT "lld"
+# define I64uFORMAT "llu"
+#endif
 
-#include "impl/util.h"
+#if defined(HAVE___INT128)
+# if defined(__GNUC__)
+#  define SECP256K1_GNUC_EXT __extension__
+# else
+#  define SECP256K1_GNUC_EXT
+# endif
+SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
+#endif
 
 #endif
diff --git a/test/libdevcrypto/crypto.cpp b/test/libdevcrypto/crypto.cpp
index b70eae36e..2db6e5faa 100644
--- a/test/libdevcrypto/crypto.cpp
+++ b/test/libdevcrypto/crypto.cpp
@@ -23,7 +23,7 @@
 
 #include <random>
 #if ETH_HAVE_SECP256K1
-#include <secp256k1/secp256k1.h>
+#include <secp256k1/include/secp256k1.h>
 #endif
 #include <libdevcore/Common.h>
 #include <libdevcore/RLP.h>
@@ -115,7 +115,7 @@ BOOST_AUTO_TEST_CASE(common_encrypt_decrypt)
 BOOST_AUTO_TEST_CASE(cryptopp_cryptopp_secp256k1libport)
 {
 #if ETH_HAVE_SECP256K1
-	secp256k1_start();
+	secp256k1_context_t* ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
 #endif
 	
 	// base secret
@@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE(cryptopp_cryptopp_secp256k1libport)
 		size_t cssz = DSAConvertSignatureFormat(dersig, 72, DSA_DER, sig.data(), 64, DSA_P1363);
 		BOOST_CHECK(cssz <= 72);
 #if ETH_HAVE_SECP256K1
-		BOOST_REQUIRE(1 == secp256k1_ecdsa_verify(he.data(), sizeof(he), dersig, cssz, encpub, 65));
+		BOOST_REQUIRE(1 == secp256k1_ecdsa_verify(ctx, he.data(), dersig, cssz, encpub, 65));
 #endif
 	}
 }