5 changed files with 317 additions and 0 deletions
@ -0,0 +1,22 @@ |
package com.xsn.explorer.gcs |
sealed trait Bit extends Product with Serializable { |
def toInt: Int = this match { |
case Bit.Zero => 0 |
case Bit.One => 1 |
} |
override def toString: String = toInt.toString |
} |
object Bit { |
final case object Zero extends Bit |
final case object One extends Bit |
def from(char: Char): Option[Bit] = char match { |
case '0' => Option(Bit.Zero) |
case '1' => Option(Bit.One) |
case _ => None |
} |
} |
@ -0,0 +1,18 @@ |
package com.xsn.explorer.gcs |
import com.xsn.explorer.models.values.HexString |
class GolombCodedSet( |
val p: Int, |
val m: Int, |
val n: Int, |
val data: List[UnsignedByte]) { |
def hex: HexString = { |
val string = data.map(_.byte).map("%02x".format(_)).mkString("") |
HexString.from(string) match { |
case Some(value) => value |
case None => throw new RuntimeException("Unexpected error, unable to create hex value") |
} |
} |
} |
@ -0,0 +1,150 @@ |
package com.xsn.explorer.gcs |
import com.google.common.hash.Hashing |
/** |
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M. |
* |
* The encoding is also parameterized by P, the bit length of the remainder code. |
* |
* see https://github.com/bitcoin/bips/blob/master/bip-0158.mediawikis |
*/ |
class GolombEncoding(p: Int, m: Int, key: SipHashKey) { |
require(p > 1 && p < 31) |
private val hasher = Hashing.sipHash24(key.k0, key.k1) |
/** |
* Encodes the given word list. |
*/ |
def encode(words: List[String]): GolombCodedSet = { |
val hashList = hashes(words) |
val diffList = differences(hashList) |
val encodedBits = diffList.flatMap(golombEncode) |
val encodedBytes = encodedBits |
.grouped(8) |
.map { bits => UnsignedByte.parse(bits.padTo(8, Bit.Zero)) } |
.toList |
new GolombCodedSet( |
p = p, |
m = m, |
n = words.size, |
data = encodedBytes) |
} |
/** |
* Recovers the hashes from the encoded bytes. |
* |
* This method doesn't handle corrupted inputs, which shouldn't be a problem because |
* the method is used only to verify that the filter is correct. |
* |
* @param encoded the encoded bytes, we expect them to be correct |
* @param n the number of words encoded in the bytes |
* @return the recovered list of hashes |
*/ |
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = { |
val encodedBits = encoded.flatMap(_.bits) |
val (_, _, result) = List.fill(n)(0) |
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) => |
val (remaining, delta) = golombDecode(bits) |
val hash = acc + delta |
(remaining, hash, hash :: hashes) |
} |
result.reverse |
} |
/** |
* Maps the word list to a list of hashes. |
*/ |
private[gcs] def hashes(words: List[String]): List[BigInt] = { |
val modulus = BigInt(m) * words.length |
val f = fastReduction(_: BigInt, modulus) |
words |
.map(hash) |
.map(f) |
.sorted |
} |
private def golombEncode(x: BigInt): List[Bit] = { |
val q = (x >> p).toInt |
val r = (x & ((1 << p)-1)).toInt |
val qBits = List.fill[Bit](q)(Bit.One) :+ Bit.Zero |
val rBits = toBits(r, p) |
qBits ++ rBits |
} |
private def golombDecode(bits: List[Bit]): (List[Bit], BigInt) = { |
val q = bits.takeWhile(_ == Bit.One).size |
val rBits = bits.drop(q + 1).take(p) |
val r = toBigInt(rBits) |
val x = (q * (1L << p)) + r |
val pending = bits.drop(q + 1 + p) |
(pending, x) |
} |
private def differences(sortedHashList: List[BigInt]): List[BigInt] = { |
(BigInt(0) :: sortedHashList) |
.sliding(2) |
.map { case a :: b :: Nil => b - a } |
.toList |
} |
private def hash(string: String): BigInt = { |
val x = hasher.hashBytes(string.getBytes) |
BigInt(java.lang.Long.toUnsignedString(x.asLong())) |
} |
private def toBigInt(bits: List[Bit]): BigInt = { |
bits.foldLeft(BigInt(0)) { case (acc, cur) => |
(acc * 2) + cur.toInt |
} |
} |
private def toBits(x: Long, size: Int): List[Bit] = { |
val bits = x |
.toBinaryString |
.flatMap(Bit.from) |
.toList |
List.fill(size - bits.size)(Bit.Zero) ++ bits |
} |
private def fastReduction(v: BigInt, modulus: BigInt): BigInt = { |
val nHi = modulus >> 32 |
val nLo = modulus & 0xFFFFFFFFL |
// First, we'll spit the item we need to reduce into its higher and lower bits. |
val vhi = v >> 32 |
val vlo = v & 0xFFFFFFFFL |
// Then, we distribute multiplication over each part. |
val vnphi = vhi * nHi |
val vnpmid = vhi * nLo |
val npvmid = nHi * vlo |
val vnplo = vlo * nLo |
// We calculate the carry bit. |
val carry = ((vnpmid & 0xFFFFFFFFL) + (npvmid & 0xFFFFFFFFL) + (vnplo >> 32)) >> 32 |
// Last, we add the high bits, the middle bits, and the carry. |
val result = vnphi + (vnpmid >> 32) + (npvmid >> 32) + carry |
result |
} |
} |
object GolombEncoding { |
val DefaultP = 19 |
val DefaultM = 784931 |
def default(key: SipHashKey): GolombEncoding = { |
new GolombEncoding(p = DefaultP, m = DefaultM, key = key) |
} |
} |
@ -0,0 +1,34 @@ |
package com.xsn.explorer.gcs |
class UnsignedByte(val byte: Byte) extends AnyVal { |
override def toString: String = { |
toInt.toString |
} |
def toFixedBinaryString: String = { |
val string = toInt.toBinaryString |
val missing = List.fill(8 - string.length)(0).mkString("") |
missing + string |
} |
def toInt: Int = byte.toInt & 0xFF |
def bits: List[Bit] = { |
toFixedBinaryString |
.flatMap(Bit.from) |
.toList |
} |
} |
object UnsignedByte { |
def parse(bits: List[Bit]): UnsignedByte = { |
require(bits.size <= 8) |
val int = bits.foldLeft(0) { case (acc, cur) => |
(acc * 2) + cur.toInt |
} |
new UnsignedByte(int.asInstanceOf[Byte]) |
} |
} |
@ -0,0 +1,93 @@ |
package com.xsn.explorer.gcs |
import org.scalatest.{MustMatchers, WordSpec} |
class GolombEncodingSpec extends WordSpec with MustMatchers { |
val words = List( |
"Alex", |
"Bob", |
"Charlie", |
"Dick", |
"Ed", |
"Frank", |
"George", |
"Harry", |
"Ilya", |
"John", |
"Kevin", |
"Larry", |
"Michael", |
"Nate", |
"Owen", |
"Paul", |
"Quentin" |
) |
"the encoding" should { |
val keyBytes = List( |
0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15).map(_.asInstanceOf[Byte]) |
val key = SipHashKey.fromBtcutil(keyBytes) |
val golomb = GolombEncoding.default(key) |
val encoded = golomb.encode(words) |
"decode the same hashes" in { |
val hashes = golomb.hashes(words) |
val decoded = golomb.decode(encoded.data, words.size) |
decoded mustEqual hashes |
} |
"return the encoded hex from the btcutil gcs" in { |
/** |
* The hex was generated from this go code: |
{{{ |
package main |
import ( |
"encoding/hex" |
"fmt" |
"github.com/btcsuite/btcutil/gcs/builder" |
) |
func main() { |
contents := [][]byte{ |
[]byte("Alex"), |
[]byte("Bob"), |
[]byte("Charlie"), |
[]byte("Dick"), |
[]byte("Ed"), |
[]byte("Frank"), |
[]byte("George"), |
[]byte("Harry"), |
[]byte("Ilya"), |
[]byte("John"), |
[]byte("Kevin"), |
[]byte("Larry"), |
[]byte("Michael"), |
[]byte("Nate"), |
[]byte("Owen"), |
[]byte("Paul"), |
[]byte("Quentin"), |
} |
testKey := [16]byte{0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15} |
b := builder.WithRandomKey().SetKey(testKey); |
f, err := b.AddEntries(contents).Build(); |
if err != nil { |
fmt.Println("Error", err) |
} |
rawBytes, _ := f.Bytes() |
encoded := hex.EncodeToString(rawBytes); |
fmt.Println("Filter: %X\n", len(encoded), encoded) |
} |
}}} |
*/ |
val expected = "056ff79e6c2994ba5d91402f327f807097c5c571f8d212511a8237f005331346102b41967f35ef488406c38a88" |
encoded.hex.string must be(expected) |
} |
} |
} |
Reference in new issue