Browse Source

server: Add the GolombEncoding

master
Alexis Hernandez 6 years ago
parent
commit
f6f3288880
  1. 22
      server/app/com/xsn/explorer/gcs/Bit.scala
  2. 18
      server/app/com/xsn/explorer/gcs/GolombCodedSet.scala
  3. 150
      server/app/com/xsn/explorer/gcs/GolombEncoding.scala
  4. 34
      server/app/com/xsn/explorer/gcs/UnsignedByte.scala
  5. 93
      server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala

22
server/app/com/xsn/explorer/gcs/Bit.scala

@ -0,0 +1,22 @@
package com.xsn.explorer.gcs
sealed trait Bit extends Product with Serializable {
def toInt: Int = this match {
case Bit.Zero => 0
case Bit.One => 1
}
override def toString: String = toInt.toString
}
object Bit {
final case object Zero extends Bit
final case object One extends Bit
def from(char: Char): Option[Bit] = char match {
case '0' => Option(Bit.Zero)
case '1' => Option(Bit.One)
case _ => None
}
}

18
server/app/com/xsn/explorer/gcs/GolombCodedSet.scala

@ -0,0 +1,18 @@
package com.xsn.explorer.gcs
import com.xsn.explorer.models.values.HexString
class GolombCodedSet(
val p: Int,
val m: Int,
val n: Int,
val data: List[UnsignedByte]) {
def hex: HexString = {
val string = data.map(_.byte).map("%02x".format(_)).mkString("")
HexString.from(string) match {
case Some(value) => value
case None => throw new RuntimeException("Unexpected error, unable to create hex value")
}
}
}

150
server/app/com/xsn/explorer/gcs/GolombEncoding.scala

@ -0,0 +1,150 @@
package com.xsn.explorer.gcs
import com.google.common.hash.Hashing
/**
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M.
*
* The encoding is also parameterized by P, the bit length of the remainder code.
*
* see https://github.com/bitcoin/bips/blob/master/bip-0158.mediawikis
*/
class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
require(p > 1 && p < 31)
private val hasher = Hashing.sipHash24(key.k0, key.k1)
/**
* Encodes the given word list.
*/
def encode(words: List[String]): GolombCodedSet = {
val hashList = hashes(words)
val diffList = differences(hashList)
val encodedBits = diffList.flatMap(golombEncode)
val encodedBytes = encodedBits
.grouped(8)
.map { bits => UnsignedByte.parse(bits.padTo(8, Bit.Zero)) }
.toList
new GolombCodedSet(
p = p,
m = m,
n = words.size,
data = encodedBytes)
}
/**
* Recovers the hashes from the encoded bytes.
*
* This method doesn't handle corrupted inputs, which shouldn't be a problem because
* the method is used only to verify that the filter is correct.
*
* @param encoded the encoded bytes, we expect them to be correct
* @param n the number of words encoded in the bytes
* @return the recovered list of hashes
*/
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = {
val encodedBits = encoded.flatMap(_.bits)
val (_, _, result) = List.fill(n)(0)
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) =>
val (remaining, delta) = golombDecode(bits)
val hash = acc + delta
(remaining, hash, hash :: hashes)
}
result.reverse
}
/**
* Maps the word list to a list of hashes.
*/
private[gcs] def hashes(words: List[String]): List[BigInt] = {
val modulus = BigInt(m) * words.length
val f = fastReduction(_: BigInt, modulus)
words
.map(hash)
.map(f)
.sorted
}
private def golombEncode(x: BigInt): List[Bit] = {
val q = (x >> p).toInt
val r = (x & ((1 << p)-1)).toInt
val qBits = List.fill[Bit](q)(Bit.One) :+ Bit.Zero
val rBits = toBits(r, p)
qBits ++ rBits
}
private def golombDecode(bits: List[Bit]): (List[Bit], BigInt) = {
val q = bits.takeWhile(_ == Bit.One).size
val rBits = bits.drop(q + 1).take(p)
val r = toBigInt(rBits)
val x = (q * (1L << p)) + r
val pending = bits.drop(q + 1 + p)
(pending, x)
}
private def differences(sortedHashList: List[BigInt]): List[BigInt] = {
(BigInt(0) :: sortedHashList)
.sliding(2)
.map { case a :: b :: Nil => b - a }
.toList
}
private def hash(string: String): BigInt = {
val x = hasher.hashBytes(string.getBytes)
BigInt(java.lang.Long.toUnsignedString(x.asLong()))
}
private def toBigInt(bits: List[Bit]): BigInt = {
bits.foldLeft(BigInt(0)) { case (acc, cur) =>
(acc * 2) + cur.toInt
}
}
private def toBits(x: Long, size: Int): List[Bit] = {
val bits = x
.toBinaryString
.flatMap(Bit.from)
.toList
List.fill(size - bits.size)(Bit.Zero) ++ bits
}
private def fastReduction(v: BigInt, modulus: BigInt): BigInt = {
val nHi = modulus >> 32
val nLo = modulus & 0xFFFFFFFFL
// First, we'll spit the item we need to reduce into its higher and lower bits.
val vhi = v >> 32
val vlo = v & 0xFFFFFFFFL
// Then, we distribute multiplication over each part.
val vnphi = vhi * nHi
val vnpmid = vhi * nLo
val npvmid = nHi * vlo
val vnplo = vlo * nLo
// We calculate the carry bit.
val carry = ((vnpmid & 0xFFFFFFFFL) + (npvmid & 0xFFFFFFFFL) + (vnplo >> 32)) >> 32
// Last, we add the high bits, the middle bits, and the carry.
val result = vnphi + (vnpmid >> 32) + (npvmid >> 32) + carry
result
}
}
object GolombEncoding {
val DefaultP = 19
val DefaultM = 784931
def default(key: SipHashKey): GolombEncoding = {
new GolombEncoding(p = DefaultP, m = DefaultM, key = key)
}
}

34
server/app/com/xsn/explorer/gcs/UnsignedByte.scala

@ -0,0 +1,34 @@
package com.xsn.explorer.gcs
class UnsignedByte(val byte: Byte) extends AnyVal {
override def toString: String = {
toInt.toString
}
def toFixedBinaryString: String = {
val string = toInt.toBinaryString
val missing = List.fill(8 - string.length)(0).mkString("")
missing + string
}
def toInt: Int = byte.toInt & 0xFF
def bits: List[Bit] = {
toFixedBinaryString
.flatMap(Bit.from)
.toList
}
}
object UnsignedByte {
def parse(bits: List[Bit]): UnsignedByte = {
require(bits.size <= 8)
val int = bits.foldLeft(0) { case (acc, cur) =>
(acc * 2) + cur.toInt
}
new UnsignedByte(int.asInstanceOf[Byte])
}
}

93
server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala

@ -0,0 +1,93 @@
package com.xsn.explorer.gcs
import org.scalatest.{MustMatchers, WordSpec}
class GolombEncodingSpec extends WordSpec with MustMatchers {
val words = List(
"Alex",
"Bob",
"Charlie",
"Dick",
"Ed",
"Frank",
"George",
"Harry",
"Ilya",
"John",
"Kevin",
"Larry",
"Michael",
"Nate",
"Owen",
"Paul",
"Quentin"
)
"the encoding" should {
val keyBytes = List(
0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41,
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15).map(_.asInstanceOf[Byte])
val key = SipHashKey.fromBtcutil(keyBytes)
val golomb = GolombEncoding.default(key)
val encoded = golomb.encode(words)
"decode the same hashes" in {
val hashes = golomb.hashes(words)
val decoded = golomb.decode(encoded.data, words.size)
decoded mustEqual hashes
}
"return the encoded hex from the btcutil gcs" in {
/**
* The hex was generated from this go code:
{{{
package main
import (
"encoding/hex"
"fmt"
"github.com/btcsuite/btcutil/gcs/builder"
)
func main() {
contents := [][]byte{
[]byte("Alex"),
[]byte("Bob"),
[]byte("Charlie"),
[]byte("Dick"),
[]byte("Ed"),
[]byte("Frank"),
[]byte("George"),
[]byte("Harry"),
[]byte("Ilya"),
[]byte("John"),
[]byte("Kevin"),
[]byte("Larry"),
[]byte("Michael"),
[]byte("Nate"),
[]byte("Owen"),
[]byte("Paul"),
[]byte("Quentin"),
}
testKey := [16]byte{0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41,
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15}
b := builder.WithRandomKey().SetKey(testKey);
f, err := b.AddEntries(contents).Build();
if err != nil {
fmt.Println("Error", err)
}
rawBytes, _ := f.Bytes()
encoded := hex.EncodeToString(rawBytes);
fmt.Println("Filter: %X\n", len(encoded), encoded)
}
}}}
*/
val expected = "056ff79e6c2994ba5d91402f327f807097c5c571f8d212511a8237f005331346102b41967f35ef488406c38a88"
encoded.hex.string must be(expected)
}
}
}
Loading…
Cancel
Save