Browse Source

server: Update the GolombEncoding to use Set and SortedSet

master
Alexis Hernandez 6 years ago
parent
commit
8c0f92a07a
  1. 26
      server/app/com/xsn/explorer/gcs/GolombEncoding.scala
  2. 4
      server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala

26
server/app/com/xsn/explorer/gcs/GolombEncoding.scala

@ -2,6 +2,8 @@ package com.xsn.explorer.gcs
import com.google.common.hash.Hashing
import scala.collection.SortedSet
/**
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M.
*
@ -17,9 +19,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
/**
* Encodes the given word list.
*/
def encode(words: List[String]): GolombCodedSet = {
val hashList = hashes(words)
val diffList = differences(hashList)
def encode(words: Set[String]): GolombCodedSet = {
val sortedHashes = hashes(words)
val diffList = differences(sortedHashes)
val encodedBits = diffList.flatMap(golombEncode)
val encodedBytes = encodedBits
.grouped(8)
@ -41,9 +43,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
*
* @param encoded the encoded bytes, we expect them to be correct
* @param n the number of words encoded in the bytes
* @return the recovered list of hashes
* @return the recovered sorted set of hashes
*/
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = {
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): SortedSet[BigInt] = {
val encodedBits = encoded.flatMap(_.bits)
val (_, _, result) = List.fill(n)(0)
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) =>
@ -52,19 +54,19 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
(remaining, hash, hash :: hashes)
}
result.reverse
result.to[SortedSet]
}
/**
* Maps the word list to a list of hashes.
* Maps the word set to a sorted set of hashes.
*/
private[gcs] def hashes(words: List[String]): List[BigInt] = {
val modulus = BigInt(m) * words.length
private[gcs] def hashes(words: Set[String]): SortedSet[BigInt] = {
val modulus = BigInt(m) * words.size
val f = fastReduction(_: BigInt, modulus)
words
.map(hash)
.map(f)
.sorted
.to[SortedSet]
}
private def golombEncode(x: BigInt): List[Bit] = {
@ -88,8 +90,8 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
(pending, x)
}
private def differences(sortedHashList: List[BigInt]): List[BigInt] = {
(BigInt(0) :: sortedHashList)
private def differences(sortedHashes: SortedSet[BigInt]): List[BigInt] = {
(BigInt(0) :: sortedHashes.toList)
.sliding(2)
.map { case a :: b :: Nil => b - a }
.toList

4
server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala

@ -32,10 +32,10 @@ class GolombEncodingSpec extends WordSpec with MustMatchers {
val key = SipHashKey.fromBtcutil(keyBytes)
val golomb = GolombEncoding.default(key)
val encoded = golomb.encode(words)
val encoded = golomb.encode(words.toSet)
"decode the same hashes" in {
val hashes = golomb.hashes(words)
val hashes = golomb.hashes(words.toSet)
val bytes = BaseEncoding
.base16()
.decode(encoded.hex.string.toUpperCase)

Loading…
Cancel
Save