5 changed files with 317 additions and 0 deletions
@ -0,0 +1,22 @@ |
|||||
|
package com.xsn.explorer.gcs |
||||
|
|
||||
|
sealed trait Bit extends Product with Serializable { |
||||
|
|
||||
|
def toInt: Int = this match { |
||||
|
case Bit.Zero => 0 |
||||
|
case Bit.One => 1 |
||||
|
} |
||||
|
|
||||
|
override def toString: String = toInt.toString |
||||
|
} |
||||
|
|
||||
|
object Bit { |
||||
|
final case object Zero extends Bit |
||||
|
final case object One extends Bit |
||||
|
|
||||
|
def from(char: Char): Option[Bit] = char match { |
||||
|
case '0' => Option(Bit.Zero) |
||||
|
case '1' => Option(Bit.One) |
||||
|
case _ => None |
||||
|
} |
||||
|
} |
@ -0,0 +1,18 @@ |
|||||
|
package com.xsn.explorer.gcs |
||||
|
|
||||
|
import com.xsn.explorer.models.values.HexString |
||||
|
|
||||
|
class GolombCodedSet( |
||||
|
val p: Int, |
||||
|
val m: Int, |
||||
|
val n: Int, |
||||
|
val data: List[UnsignedByte]) { |
||||
|
|
||||
|
def hex: HexString = { |
||||
|
val string = data.map(_.byte).map("%02x".format(_)).mkString("") |
||||
|
HexString.from(string) match { |
||||
|
case Some(value) => value |
||||
|
case None => throw new RuntimeException("Unexpected error, unable to create hex value") |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,150 @@ |
|||||
|
package com.xsn.explorer.gcs |
||||
|
|
||||
|
import com.google.common.hash.Hashing |
||||
|
|
||||
|
/** |
||||
|
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M. |
||||
|
* |
||||
|
* The encoding is also parameterized by P, the bit length of the remainder code. |
||||
|
* |
||||
|
* see https://github.com/bitcoin/bips/blob/master/bip-0158.mediawikis |
||||
|
*/ |
||||
|
class GolombEncoding(p: Int, m: Int, key: SipHashKey) { |
||||
|
require(p > 1 && p < 31) |
||||
|
|
||||
|
private val hasher = Hashing.sipHash24(key.k0, key.k1) |
||||
|
|
||||
|
/** |
||||
|
* Encodes the given word list. |
||||
|
*/ |
||||
|
def encode(words: List[String]): GolombCodedSet = { |
||||
|
val hashList = hashes(words) |
||||
|
val diffList = differences(hashList) |
||||
|
val encodedBits = diffList.flatMap(golombEncode) |
||||
|
val encodedBytes = encodedBits |
||||
|
.grouped(8) |
||||
|
.map { bits => UnsignedByte.parse(bits.padTo(8, Bit.Zero)) } |
||||
|
.toList |
||||
|
|
||||
|
new GolombCodedSet( |
||||
|
p = p, |
||||
|
m = m, |
||||
|
n = words.size, |
||||
|
data = encodedBytes) |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* Recovers the hashes from the encoded bytes. |
||||
|
* |
||||
|
* This method doesn't handle corrupted inputs, which shouldn't be a problem because |
||||
|
* the method is used only to verify that the filter is correct. |
||||
|
* |
||||
|
* @param encoded the encoded bytes, we expect them to be correct |
||||
|
* @param n the number of words encoded in the bytes |
||||
|
* @return the recovered list of hashes |
||||
|
*/ |
||||
|
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = { |
||||
|
val encodedBits = encoded.flatMap(_.bits) |
||||
|
val (_, _, result) = List.fill(n)(0) |
||||
|
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) => |
||||
|
val (remaining, delta) = golombDecode(bits) |
||||
|
val hash = acc + delta |
||||
|
(remaining, hash, hash :: hashes) |
||||
|
} |
||||
|
|
||||
|
result.reverse |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* Maps the word list to a list of hashes. |
||||
|
*/ |
||||
|
private[gcs] def hashes(words: List[String]): List[BigInt] = { |
||||
|
val modulus = BigInt(m) * words.length |
||||
|
val f = fastReduction(_: BigInt, modulus) |
||||
|
words |
||||
|
.map(hash) |
||||
|
.map(f) |
||||
|
.sorted |
||||
|
} |
||||
|
|
||||
|
private def golombEncode(x: BigInt): List[Bit] = { |
||||
|
val q = (x >> p).toInt |
||||
|
val r = (x & ((1 << p)-1)).toInt |
||||
|
|
||||
|
val qBits = List.fill[Bit](q)(Bit.One) :+ Bit.Zero |
||||
|
val rBits = toBits(r, p) |
||||
|
|
||||
|
qBits ++ rBits |
||||
|
} |
||||
|
|
||||
|
private def golombDecode(bits: List[Bit]): (List[Bit], BigInt) = { |
||||
|
val q = bits.takeWhile(_ == Bit.One).size |
||||
|
val rBits = bits.drop(q + 1).take(p) |
||||
|
val r = toBigInt(rBits) |
||||
|
|
||||
|
val x = (q * (1L << p)) + r |
||||
|
val pending = bits.drop(q + 1 + p) |
||||
|
|
||||
|
(pending, x) |
||||
|
} |
||||
|
|
||||
|
private def differences(sortedHashList: List[BigInt]): List[BigInt] = { |
||||
|
(BigInt(0) :: sortedHashList) |
||||
|
.sliding(2) |
||||
|
.map { case a :: b :: Nil => b - a } |
||||
|
.toList |
||||
|
} |
||||
|
|
||||
|
private def hash(string: String): BigInt = { |
||||
|
val x = hasher.hashBytes(string.getBytes) |
||||
|
BigInt(java.lang.Long.toUnsignedString(x.asLong())) |
||||
|
} |
||||
|
|
||||
|
private def toBigInt(bits: List[Bit]): BigInt = { |
||||
|
bits.foldLeft(BigInt(0)) { case (acc, cur) => |
||||
|
(acc * 2) + cur.toInt |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private def toBits(x: Long, size: Int): List[Bit] = { |
||||
|
val bits = x |
||||
|
.toBinaryString |
||||
|
.flatMap(Bit.from) |
||||
|
.toList |
||||
|
|
||||
|
List.fill(size - bits.size)(Bit.Zero) ++ bits |
||||
|
} |
||||
|
|
||||
|
private def fastReduction(v: BigInt, modulus: BigInt): BigInt = { |
||||
|
val nHi = modulus >> 32 |
||||
|
val nLo = modulus & 0xFFFFFFFFL |
||||
|
|
||||
|
// First, we'll spit the item we need to reduce into its higher and lower bits. |
||||
|
val vhi = v >> 32 |
||||
|
val vlo = v & 0xFFFFFFFFL |
||||
|
|
||||
|
// Then, we distribute multiplication over each part. |
||||
|
val vnphi = vhi * nHi |
||||
|
val vnpmid = vhi * nLo |
||||
|
val npvmid = nHi * vlo |
||||
|
val vnplo = vlo * nLo |
||||
|
|
||||
|
// We calculate the carry bit. |
||||
|
val carry = ((vnpmid & 0xFFFFFFFFL) + (npvmid & 0xFFFFFFFFL) + (vnplo >> 32)) >> 32 |
||||
|
|
||||
|
// Last, we add the high bits, the middle bits, and the carry. |
||||
|
val result = vnphi + (vnpmid >> 32) + (npvmid >> 32) + carry |
||||
|
|
||||
|
result |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
object GolombEncoding { |
||||
|
|
||||
|
val DefaultP = 19 |
||||
|
val DefaultM = 784931 |
||||
|
|
||||
|
def default(key: SipHashKey): GolombEncoding = { |
||||
|
new GolombEncoding(p = DefaultP, m = DefaultM, key = key) |
||||
|
} |
||||
|
} |
@ -0,0 +1,34 @@ |
|||||
|
package com.xsn.explorer.gcs |
||||
|
|
||||
|
class UnsignedByte(val byte: Byte) extends AnyVal { |
||||
|
|
||||
|
override def toString: String = { |
||||
|
toInt.toString |
||||
|
} |
||||
|
|
||||
|
def toFixedBinaryString: String = { |
||||
|
val string = toInt.toBinaryString |
||||
|
val missing = List.fill(8 - string.length)(0).mkString("") |
||||
|
missing + string |
||||
|
} |
||||
|
|
||||
|
def toInt: Int = byte.toInt & 0xFF |
||||
|
|
||||
|
def bits: List[Bit] = { |
||||
|
toFixedBinaryString |
||||
|
.flatMap(Bit.from) |
||||
|
.toList |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
object UnsignedByte { |
||||
|
def parse(bits: List[Bit]): UnsignedByte = { |
||||
|
require(bits.size <= 8) |
||||
|
|
||||
|
val int = bits.foldLeft(0) { case (acc, cur) => |
||||
|
(acc * 2) + cur.toInt |
||||
|
} |
||||
|
|
||||
|
new UnsignedByte(int.asInstanceOf[Byte]) |
||||
|
} |
||||
|
} |
@ -0,0 +1,93 @@ |
|||||
|
package com.xsn.explorer.gcs |
||||
|
|
||||
|
import org.scalatest.{MustMatchers, WordSpec} |
||||
|
|
||||
|
class GolombEncodingSpec extends WordSpec with MustMatchers { |
||||
|
|
||||
|
val words = List( |
||||
|
"Alex", |
||||
|
"Bob", |
||||
|
"Charlie", |
||||
|
"Dick", |
||||
|
"Ed", |
||||
|
"Frank", |
||||
|
"George", |
||||
|
"Harry", |
||||
|
"Ilya", |
||||
|
"John", |
||||
|
"Kevin", |
||||
|
"Larry", |
||||
|
"Michael", |
||||
|
"Nate", |
||||
|
"Owen", |
||||
|
"Paul", |
||||
|
"Quentin" |
||||
|
) |
||||
|
|
||||
|
"the encoding" should { |
||||
|
val keyBytes = List( |
||||
|
0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
||||
|
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15).map(_.asInstanceOf[Byte]) |
||||
|
|
||||
|
val key = SipHashKey.fromBtcutil(keyBytes) |
||||
|
val golomb = GolombEncoding.default(key) |
||||
|
val encoded = golomb.encode(words) |
||||
|
|
||||
|
"decode the same hashes" in { |
||||
|
val hashes = golomb.hashes(words) |
||||
|
val decoded = golomb.decode(encoded.data, words.size) |
||||
|
|
||||
|
decoded mustEqual hashes |
||||
|
} |
||||
|
|
||||
|
"return the encoded hex from the btcutil gcs" in { |
||||
|
/** |
||||
|
* The hex was generated from this go code: |
||||
|
{{{ |
||||
|
package main |
||||
|
|
||||
|
import ( |
||||
|
"encoding/hex" |
||||
|
"fmt" |
||||
|
"github.com/btcsuite/btcutil/gcs/builder" |
||||
|
) |
||||
|
|
||||
|
func main() { |
||||
|
contents := [][]byte{ |
||||
|
[]byte("Alex"), |
||||
|
[]byte("Bob"), |
||||
|
[]byte("Charlie"), |
||||
|
[]byte("Dick"), |
||||
|
[]byte("Ed"), |
||||
|
[]byte("Frank"), |
||||
|
[]byte("George"), |
||||
|
[]byte("Harry"), |
||||
|
[]byte("Ilya"), |
||||
|
[]byte("John"), |
||||
|
[]byte("Kevin"), |
||||
|
[]byte("Larry"), |
||||
|
[]byte("Michael"), |
||||
|
[]byte("Nate"), |
||||
|
[]byte("Owen"), |
||||
|
[]byte("Paul"), |
||||
|
[]byte("Quentin"), |
||||
|
} |
||||
|
testKey := [16]byte{0x4c, 0xb1, 0xab, 0x12, 0x57, 0x62, 0x1e, 0x41, |
||||
|
0x3b, 0x8b, 0x0e, 0x26, 0x64, 0x8d, 0x4a, 0x15} |
||||
|
|
||||
|
b := builder.WithRandomKey().SetKey(testKey); |
||||
|
f, err := b.AddEntries(contents).Build(); |
||||
|
if err != nil { |
||||
|
fmt.Println("Error", err) |
||||
|
} |
||||
|
rawBytes, _ := f.Bytes() |
||||
|
encoded := hex.EncodeToString(rawBytes); |
||||
|
fmt.Println("Filter: %X\n", len(encoded), encoded) |
||||
|
} |
||||
|
}}} |
||||
|
*/ |
||||
|
val expected = "056ff79e6c2994ba5d91402f327f807097c5c571f8d212511a8237f005331346102b41967f35ef488406c38a88" |
||||
|
encoded.hex.string must be(expected) |
||||
|
} |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue