105 lines
4.8 KiB
105 lines
4.8 KiB
// Copyright (c) 2013 Pieter Wuille
|
|
// Distributed under the MIT/X11 software license, see the accompanying
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
|
|
#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
|
|
|
|
#include <stdint.h>
|
|
|
|
void static inline secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {
|
|
__int128 c = (__int128)a[0] * b[0];
|
|
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
|
|
c = c + (__int128)a[0] * b[1] +
|
|
(__int128)a[1] * b[0];
|
|
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
|
|
c = c + (__int128)a[0] * b[2] +
|
|
(__int128)a[1] * b[1] +
|
|
(__int128)a[2] * b[0];
|
|
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
|
|
c = c + (__int128)a[0] * b[3] +
|
|
(__int128)a[1] * b[2] +
|
|
(__int128)a[2] * b[1] +
|
|
(__int128)a[3] * b[0];
|
|
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
|
|
c = c + (__int128)a[0] * b[4] +
|
|
(__int128)a[1] * b[3] +
|
|
(__int128)a[2] * b[2] +
|
|
(__int128)a[3] * b[1] +
|
|
(__int128)a[4] * b[0];
|
|
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
|
|
c = c + (__int128)a[1] * b[4] +
|
|
(__int128)a[2] * b[3] +
|
|
(__int128)a[3] * b[2] +
|
|
(__int128)a[4] * b[1];
|
|
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
|
|
c = c + (__int128)a[2] * b[4] +
|
|
(__int128)a[3] * b[3] +
|
|
(__int128)a[4] * b[2];
|
|
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
|
|
c = c + (__int128)a[3] * b[4] +
|
|
(__int128)a[4] * b[3];
|
|
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
|
|
c = c + (__int128)a[4] * b[4];
|
|
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
|
|
uint64_t t9 = c;
|
|
|
|
c = t0 + (__int128)t5 * 0x1000003D10ULL;
|
|
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
|
|
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
|
|
r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
|
|
r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
|
|
r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
|
|
c = t0 + (__int128)c * 0x1000003D1ULL;
|
|
r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
|
|
r[1] = t1 + c;
|
|
|
|
}
|
|
|
|
void static inline secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) {
|
|
__int128 c = (__int128)a[0] * a[0];
|
|
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
|
|
c = c + (__int128)(a[0]*2) * a[1];
|
|
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
|
|
c = c + (__int128)(a[0]*2) * a[2] +
|
|
(__int128)a[1] * a[1];
|
|
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
|
|
c = c + (__int128)(a[0]*2) * a[3] +
|
|
(__int128)(a[1]*2) * a[2];
|
|
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
|
|
c = c + (__int128)(a[0]*2) * a[4] +
|
|
(__int128)(a[1]*2) * a[3] +
|
|
(__int128)a[2] * a[2];
|
|
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
|
|
c = c + (__int128)(a[1]*2) * a[4] +
|
|
(__int128)(a[2]*2) * a[3];
|
|
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
|
|
c = c + (__int128)(a[2]*2) * a[4] +
|
|
(__int128)a[3] * a[3];
|
|
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
|
|
c = c + (__int128)(a[3]*2) * a[4];
|
|
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
|
|
c = c + (__int128)a[4] * a[4];
|
|
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
|
|
uint64_t t9 = c;
|
|
c = t0 + (__int128)t5 * 0x1000003D10ULL;
|
|
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
|
|
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
|
|
r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
|
|
r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
|
|
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
|
|
r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
|
|
c = t0 + (__int128)c * 0x1000003D1ULL;
|
|
r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
|
|
r[1] = t1 + c;
|
|
|
|
}
|
|
|
|
#endif
|
|
|