5 changed files with 317 additions and 0 deletions
@ -0,0 +1,22 @@ |
|||
package com.xsn.explorer.gcs |
|||
|
|||
sealed trait Bit extends Product with Serializable { |
|||
|
|||
def toInt: Int = this match { |
|||
case Bit.Zero => 0 |
|||
case Bit.One => 1 |
|||
} |
|||
|
|||
override def toString: String = toInt.toString |
|||
} |
|||
|
|||
object Bit { |
|||
final case object Zero extends Bit |
|||
final case object One extends Bit |
|||
|
|||
def from(char: Char): Option[Bit] = char match { |
|||
case '0' => Option(Bit.Zero) |
|||
case '1' => Option(Bit.One) |
|||
case _ => None |
|||
} |
|||
} |
@ -0,0 +1,18 @@ |
|||
package com.xsn.explorer.gcs |
|||
|
|||
import com.xsn.explorer.models.values.HexString |
|||
|
|||
class GolombCodedSet( |
|||
val p: Int, |
|||
val m: Int, |
|||
val n: Int, |
|||
val data: List[UnsignedByte]) { |
|||
|
|||
def hex: HexString = { |
|||
val string = data.map(_.byte).map("%02x".format(_)).mkString("") |
|||
HexString.from(string) match { |
|||
case Some(value) => value |
|||
case None => throw new RuntimeException("Unexpected error, unable to create hex value") |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,150 @@ |
|||
package com.xsn.explorer.gcs |
|||
|
|||
import com.google.common.hash.Hashing |
|||
|
|||
/** |
|||
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M. |
|||
* |
|||
* The encoding is also parameterized by P, the bit length of the remainder code. |
|||
* |
|||
* see https://github.com/bitcoin/bips/blob/master/bip-0158.mediawikis |
|||
*/ |
|||
class GolombEncoding(p: Int, m: Int, key: SipHashKey) { |
|||
require(p > 1 && p < 31) |
|||
|
|||
private val hasher = Hashing.sipHash24(key.k0, key.k1) |
|||
|
|||
/** |
|||
* Encodes the given word list. |
|||
*/ |
|||
def encode(words: List[String]): GolombCodedSet = { |
|||
val hashList = hashes(words) |
|||
val diffList = differences(hashList) |
|||
val encodedBits = diffList.flatMap(golombEncode) |
|||
val encodedBytes = encodedBits |
|||
.grouped(8) |
|||
.map { bits => UnsignedByte.parse(bits.padTo(8, Bit.Zero)) } |
|||
.toList |
|||
|
|||
new GolombCodedSet( |
|||
p = p, |
|||
m = m, |
|||
n = words.size, |
|||
data = encodedBytes) |
|||
} |
|||
|
|||
/** |
|||
* Recovers the hashes from the encoded bytes. |
|||
* |
|||
* This method doesn't handle corrupted inputs, which shouldn't be a problem because |
|||
* the method is used only to verify that the filter is correct. |
|||
* |
|||
* @param encoded the encoded bytes, we expect them to be correct |
|||
* @param n the number of words encoded in the bytes |
|||
* @return the recovered list of hashes |
|||
*/ |
|||
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = { |
|||
val encodedBits = encoded.flatMap(_.bits) |
|||
val (_, _, result) = List.fill(n)(0) |
|||
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) => |
|||
val (remaining, delta) = golombDecode(bits) |
|||
val hash = acc + delta |
|||
(remaining, hash, hash :: hashes) |
|||
} |
|||
|
|||
result.reverse |
|||
} |
|||
|
|||
/** |
|||
* Maps the word list to a list of hashes. |
|||
*/ |
|||
private[gcs] def hashes(words: List[String]): List[BigInt] = { |
|||
val modulus = BigInt(m) * words.length |
|||
val f = fastReduction(_: BigInt, modulus) |
|||
words |
|||
.map(hash) |
|||
.map(f) |
|||
.sorted |
|||
} |
|||
|
|||
private def golombEncode(x: BigInt): List[Bit] = { |
|||
val q = (x >> p).toInt |
|||
val r = (x & ((1 << p)-1)).toInt |
|||
|
|||
val qBits = List.fill[Bit](q)(Bit.One) :+ Bit.Zero |
|||
val rBits = toBits(r, p) |
|||
|
|||
qBits ++ rBits |
|||
} |
|||
|
|||
private def golombDecode(bits: List[Bit]): (List[Bit], BigInt) = { |
|||
val q = bits.takeWhile(_ == Bit.One).size |
|||
val rBits = bits.drop(q + 1).take(p) |
|||
val r = toBigInt(rBits) |
|||
|
|||
val x = (q * (1L << p)) + r |
|||
val pending = bits.drop(q + 1 + p) |
|||
|
|||
(pending, x) |
|||
} |
|||
|
|||
private def differences(sortedHashList: List[BigInt]): List[BigInt] = { |
|||
(BigInt(0) :: sortedHashList) |
|||
.sliding(2) |
|||
.map { case a :: b :: Nil => b - a } |
|||
.toList |
|||
} |
|||
|
|||
private def hash(string: String): BigInt = { |
|||
val x = hasher.hashBytes(string.getBytes) |
|||
BigInt(java.lang.Long.toUnsignedString(x.asLong())) |
|||
} |
|||
|
|||
private def toBigInt(bits: List[Bit]): BigInt = { |
|||
bits.foldLeft(BigInt(0)) { case (acc, cur) => |
|||
(acc * 2) + cur.toInt |
|||
} |
|||
} |
|||
|
|||
private def toBits(x: Long, size: Int): List[Bit] = { |
|||
val bits = x |
|||
.toBinaryString |
|||
.flatMap(Bit.from) |
|||
.toList |
|||
|
|||
List.fill(size - bits.size)(Bit.Zero) ++ bits |
|||
} |
|||
|
|||
private def fastReduction(v: BigInt, modulus: BigInt): BigInt = { |
|||
val nHi = modulus >> 32 |
|||
val nLo = modulus & 0xFFFFFFFFL |
|||
|
|||
// First, we'll spit the item we need to reduce into its higher and lower bits. |
|||
val vhi = v >> 32 |
|||
val vlo = v & 0xFFFFFFFFL |
|||
|
|||
// Then, we distribute multiplication over each part. |
|||
val vnphi = vhi * nHi |
|||
val vnpmid = vhi * nLo |
|||
val npvmid = nHi * vlo |
|||
val vnplo = vlo * nLo |
|||
|
|||
// We calculate the carry bit. |
|||
val carry = ((vnpmid & 0xFFFFFFFFL) + (npvmid & 0xFFFFFFFFL) + (vnplo >> 32)) >> 32 |
|||
|
|||
// Last, we add the high bits, the middle bits, and the carry. |
|||
val result = vnphi + (vnpmid >> 32) + (npvmid >> 32) + carry |
|||
|
|||
result |
|||
} |
|||
} |
|||
|
|||
object GolombEncoding { |
|||
|
|||
val DefaultP = 19 |
|||
val DefaultM = 784931 |
|||
|
|||
def default(key: SipHashKey): GolombEncoding = { |
|||
new GolombEncoding(p = DefaultP, m = DefaultM, key = key) |
|||
} |
|||
} |
@ -0,0 +1,34 @@ |
|||
package com.xsn.explorer.gcs |
|||
|
|||
class UnsignedByte(val byte: Byte) extends AnyVal { |
|||
|
|||
override def toString: String = { |
|||
toInt.toString |
|||
} |
|||
|
|||
def toFixedBinaryString: String = { |
|||
val string = toInt.toBinaryString |
|||
val missing = List.fill(8 - string.length)(0).mkString("") |
|||
missing + string |
|||
} |
|||
|
|||
def toInt: Int = byte.toInt & 0xFF |
|||
|
|||
def bits: List[Bit] = { |
|||
toFixedBinaryString |
|||
.flatMap(Bit.from) |
|||
.toList |
|||
} |
|||
} |
|||
|
|||
object UnsignedByte { |
|||
def parse(bits: List[Bit]): UnsignedByte = { |
|||
require(bits.size <= 8) |
|||
|
|||
val int = bits.foldLeft(0) { case (acc, cur) => |
|||
(acc * 2) + cur.toInt |
|||
} |
|||
|
|||
new UnsignedByte(int.asInstanceOf[Byte]) |
|||
} |
|||
} |
@ -0,0 +1,93 @@ |
|||
package com.xsn.explorer.gcs |
|||
|
|||
import org.scalatest.{MustMatchers, WordSpec} |
|||
|
|||
class GolombEncodingSpec extends WordSpec with MustMatchers { |
|||
|
|||
val words = List( |
|||
"Alex", |
|||
"Bob", |
|||
"Charlie", |
|||
"Dick", |
|||
"Ed", |
|||
"Frank", |
|||
"George", |
|||
"Harry", |
|||
"Ilya", |
|||
"John", |
|||
"Kevin", |
|||
"Larry", |
|||
"Michael", |
|||
"Nate", |
|||
"Owen", |
|||
"Paul", |
|||
"Quentin" |
|||
) |
|||
|
|||
"the encoding" should { |
|||
val keyBytes = List( |
|||
0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
|||
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15).map(_.asInstanceOf[Byte]) |
|||
|
|||
val key = SipHashKey.fromBtcutil(keyBytes) |
|||
val golomb = GolombEncoding.default(key) |
|||
val encoded = golomb.encode(words) |
|||
|
|||
"decode the same hashes" in { |
|||
val hashes = golomb.hashes(words) |
|||
val decoded = golomb.decode(encoded.data, words.size) |
|||
|
|||
decoded mustEqual hashes |
|||
} |
|||
|
|||
"return the encoded hex from the btcutil gcs" in { |
|||
/** |
|||
* The hex was generated from this go code: |
|||
{{{ |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/hex" |
|||
"fmt" |
|||
"github.com/btcsuite/btcutil/gcs/builder" |
|||
) |
|||
|
|||
func main() { |
|||
contents := [][]byte{ |
|||
[]byte("Alex"), |
|||
[]byte("Bob"), |
|||
[]byte("Charlie"), |
|||
[]byte("Dick"), |
|||
[]byte("Ed"), |
|||
[]byte("Frank"), |
|||
[]byte("George"), |
|||
[]byte("Harry"), |
|||
[]byte("Ilya"), |
|||
[]byte("John"), |
|||
[]byte("Kevin"), |
|||
[]byte("Larry"), |
|||
[]byte("Michael"), |
|||
[]byte("Nate"), |
|||
[]byte("Owen"), |
|||
[]byte("Paul"), |
|||
[]byte("Quentin"), |
|||
} |
|||
testKey := [16]byte{0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
|||
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15} |
|||
|
|||
b := builder.WithRandomKey().SetKey(testKey); |
|||
f, err := b.AddEntries(contents).Build(); |
|||
if err != nil { |
|||
fmt.Println("Error", err) |
|||
} |
|||
rawBytes, _ := f.Bytes() |
|||
encoded := hex.EncodeToString(rawBytes); |
|||
fmt.Println("Filter: %X\n", len(encoded), encoded) |
|||
} |
|||
}}} |
|||
*/ |
|||
val expected = "056ff79e6c2994ba5d91402f327f807097c5c571f8d212511a8237f005331346102b41967f35ef488406c38a88" |
|||
encoded.hex.string must be(expected) |
|||
} |
|||
} |
|||
} |
Loading…
Reference in new issue