Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 48 additions & 24 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,28 +223,24 @@ func (rb *Bitmap) WriteDenseTo(bitmap []uint64) {
}
}

// Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for
// Checksum computes a hash (FNV-1a) for a bitmap that is suitable for
// using bitmaps as elements in hash sets or as keys in hash maps, as well as
// generally quicker comparisons.
// The implementation is biased towards efficiency in little endian machines, so
// expect some extra CPU cycles and memory to be used if your machine is big endian.
// Likewise, do not use this to verify integrity unless you are certain you will load
// the bitmap on a machine with the same endianess used to create it. (Thankfully
// very few people use big endian machines these days.)
// generally quick comparisons.
func (rb *Bitmap) Checksum() uint64 {
const (
offset = 14695981039346656037
prime = 1099511628211
)

var bytes []byte

hash := uint64(offset)

bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys)

for _, b := range bytes {
hash ^= uint64(b)
// Hash the keys (uint16 slice) directly
for _, key := range rb.highlowcontainer.keys {
// Hash low byte first (little endian)
hash ^= uint64(key & 0xFF)
hash *= prime
// Hash high byte
hash ^= uint64(key >> 8)
hash *= prime
}

Expand All @@ -255,23 +251,51 @@ func (rb *Bitmap) Checksum() uint64 {

switch c := c.(type) {
case *bitmapContainer:
bytes = uint64SliceAsByteSlice(c.bitmap)
for _, val := range c.bitmap {
// Hash in little-endian byte order (unrolled loop)
hash ^= uint64(val & 0xFF)
hash *= prime
hash ^= uint64((val >> 8) & 0xFF)
hash *= prime
hash ^= uint64((val >> 16) & 0xFF)
hash *= prime
hash ^= uint64((val >> 24) & 0xFF)
hash *= prime
hash ^= uint64((val >> 32) & 0xFF)
hash *= prime
hash ^= uint64((val >> 40) & 0xFF)
hash *= prime
hash ^= uint64((val >> 48) & 0xFF)
hash *= prime
hash ^= uint64((val >> 56) & 0xFF)
hash *= prime
}
case *arrayContainer:
bytes = uint16SliceAsByteSlice(c.content)
for _, val := range c.content {
// Hash low byte first (little endian)
hash ^= uint64(val & 0xFF)
hash *= prime
// Hash high byte
hash ^= uint64(val >> 8)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be val >> 8 & 0xFF? I guess it doesn't matter as it's a uint16. It was just visually noticeable compare to the block above.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is ok because the semantic is clear.

hash *= prime
}
case *runContainer16:
bytes = interval16SliceAsByteSlice(c.iv)
for _, iv := range c.iv {
// Hash start (uint16)
hash ^= uint64(iv.start & 0xFF)
hash *= prime
hash ^= uint64(iv.start >> 8)
hash *= prime
// Hash length (uint16)
hash ^= uint64(iv.length & 0xFF)
hash *= prime
hash ^= uint64(iv.length >> 8)
hash *= prime
}
default:
panic("invalid container type")
}

if len(bytes) == 0 {
panic("empty containers are not supported")
}

for _, b := range bytes {
hash ^= uint64(b)
hash *= prime
}
}

return hash
Expand Down
164 changes: 164 additions & 0 deletions roaring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,162 @@ func checkValidity(t *testing.T, rb *Bitmap) {
}

func hashTest(t *testing.T, N uint64) {
// Expected checksums for reproducible testing
expectedChecksums := map[uint64]map[uint64]map[int]uint64{
15: {
1: {0: 10756319042591085143, 1: 8628356015285516814},
2: {0: 9098333059944862327, 1: 8643661217147186334},
4: {0: 6958029410146852663, 1: 8552786581093523559},
8: {0: 1468016395925628727, 1: 17120221891265014711},
16: {0: 10544680338284849079, 1: 10276365928919663543},
32: {0: 16946816020221683671, 1: 15774941559772765735},
64: {0: 6805685596090222999, 1: 14027082878079462903},
128: {0: 10938873449103961751, 1: 15444148319012625847},
256: {0: 11309310771509366551, 1: 7069335405671456567},
512: {0: 12541562025061152055, 1: 16261505930335083639},
1024: {0: 2339435738231874871, 1: 10952176602620091831},
2048: {0: 3316768049318457015, 1: 17316440206395307191},
4096: {0: 7701764264875653559, 1: 14832271157678859191},
8192: {0: 16988454899365713580, 1: 10726883116002431148},
16384: {0: 2090028183478809173, 1: 9796457766245620565},
32768: {0: 7760976824469153733, 1: 12282386713985239749},
65536: {0: 981896277892437765, 1: 11087826385241811525},
},
100: {
1: {0: 9066279125779985483, 1: 8577657534118736529},
2: {0: 4369425401968277239, 1: 8664705869706981924},
4: {0: 11010263968690220268, 1: 8747926805318181728},
8: {0: 10633650503763838178, 1: 7914439743116618412},
16: {0: 2103672447757413801, 1: 17045885344622844731},
32: {0: 12393679577414425883, 1: 6955536801820872561},
64: {0: 8197316132833680708, 1: 8636063068238530422},
128: {0: 7231439595719530925, 1: 15483402246458171763},
256: {0: 16279390471979224259, 1: 10865940656713619409},
512: {0: 18178650019838088967, 1: 3561060448163512986},
1024: {0: 14129223433416501692, 1: 7566389309386577368},
2048: {0: 7894714367708308029, 1: 14322377208770974283},
4096: {0: 6808097750982305134, 1: 15258365281783128036},
8192: {0: 5220938908514583451, 1: 7907615236815205617},
16384: {0: 7801599049027983452, 1: 15366568190157828206},
32768: {0: 12138675384054307626, 1: 1639002974230284776},
65536: {0: 14890641509296776915, 1: 4669671005208206569},
},
512: {
1: {0: 827073066654895541, 1: 8612092039284236527},
2: {0: 12579948327828555139, 1: 8611133265144625760},
4: {0: 13237566687897418215, 1: 8610189884167809947},
8: {0: 1963122788668779039, 1: 11510392801384632065},
16: {0: 16746247274296044527, 1: 13465507099452589101},
32: {0: 825734909322010511, 1: 12319660855737761317},
64: {0: 9549528494128535247, 1: 14223792047697043957},
128: {0: 4041381740454103948, 1: 6750203723185038846},
256: {0: 7479480802351225402, 1: 15120562265861553528},
512: {0: 10226096245326616211, 1: 2123185684866484358},
1024: {0: 11147571132137264071, 1: 1491568024944778791},
2048: {0: 13981076381879476127, 1: 9988660227473270361},
4096: {0: 11371001616995089391, 1: 6746885678345436557},
8192: {0: 13391729850604800783, 1: 8278994428803645669},
16384: {0: 16904558478699170767, 1: 18317580384194851125},
32768: {0: 9632691026248850622, 1: 7001884646453199196},
65536: {0: 5681615685697371437, 1: 9355850228005660679},
},
1023: {
1: {0: 8631198422780375735, 1: 8613055211470360138},
2: {0: 5320069948751931063, 1: 8613042017330821606},
4: {0: 17410506490660662967, 1: 8613989796354150263},
8: {0: 5518796129031940535, 1: 746402130471049399},
16: {0: 14798031165738763703, 1: 12612339809927874231},
32: {0: 15315673636708793783, 1: 9532837701920862135},
64: {0: 7522839260639306167, 1: 2823706096559924663},
128: {0: 9587520344641008812, 1: 7726041297071629484},
256: {0: 17155103986506795605, 1: 6313321014394915413},
512: {0: 12175764683182123845, 1: 15506743299236676933},
1024: {0: 3571090491137291397, 1: 10578105683313814149},
2048: {0: 2237728790313941541, 1: 1717755092268207653},
4096: {0: 9686908407091041829, 1: 11237119237887424037},
8192: {0: 1820574113220693541, 1: 6255563087371431461},
16384: {0: 4282238745751428901, 1: 16052809024863665957},
32768: {0: 4757895643122848293, 1: 11441606648169673253},
65536: {0: 13049746141697269285, 1: 16352630024729701925},
},
1025: {
1: {0: 16082706216882239750, 1: 8611133265144625760},
2: {0: 12372593145282835221, 1: 8609215716865404226},
4: {0: 16585165836803014019, 1: 8606372379795418255},
8: {0: 10608955794586209375, 1: 56703568294819583},
16: {0: 8276728750771033399, 1: 8063557333063131655},
32: {0: 10794409826319712711, 1: 10503624278129685111},
64: {0: 18176085198269729708, 1: 8204216864690730060},
128: {0: 17952727548789690314, 1: 12359753801590897482},
256: {0: 14990418481054615768, 1: 17958305146627430476},
512: {0: 4875534555349427013, 1: 17195640123067758377},
1024: {0: 5974583595409151659, 1: 8978053391731676363},
2048: {0: 3044574169068517039, 1: 14966468005696446975},
4096: {0: 691405335039223607, 1: 639128391183175735},
8192: {0: 4038113400756120263, 1: 12825785820597459191},
16384: {0: 1707028181932787630, 1: 12526547269329962670},
32768: {0: 5628393221510929589, 1: 6322935199976410277},
65536: {0: 14552713712975952780, 1: 14116387162697418672},
},
4095: {
1: {0: 16949218234402177463, 1: 8613068405609898670},
2: {0: 6830997255050832823, 1: 8613085997795950046},
4: {0: 3431046584319368119, 1: 8614077757284407143},
8: {0: 10577784705589702583, 1: 11227064421971279799},
16: {0: 13582283525559942071, 1: 8280698677262929847},
32: {0: 15930731229079543980, 1: 14540819050448025772},
64: {0: 15074197189812092501, 1: 8183429771446961749},
128: {0: 16353918313334514501, 1: 8211290591646809413},
256: {0: 17541269277318240901, 1: 15870350090812465285},
512: {0: 2886944576023397925, 1: 836005010297672229},
1024: {0: 2513825157712442917, 1: 4097762838503421477},
2048: {0: 7990320838034419237, 1: 2629680369156025893},
4096: {0: 15715908757492956965, 1: 5113759417806159653},
8192: {0: 8124527689922589221, 1: 9297845031996404261},
16384: {0: 4269569377294204453, 1: 4817984819978255909},
32768: {0: 6778708389721242917, 1: 6948029477167889701},
65536: {0: 7213964021817199397, 1: 1491414404504750885},
},
4096: {
1: {0: 11204821770578667958, 1: 8612076646121441573},
2: {0: 4002267072615482294, 1: 8611102478819035852},
4: {0: 4410347288001337270, 1: 8610110719330578755},
8: {0: 8603373155648831414, 1: 2315836927982075848},
16: {0: 16944482119189024652, 1: 4822482652523786870},
32: {0: 6928244183881028666, 1: 10404343182114103400},
64: {0: 3024916132362050707, 1: 18344738716466403161},
128: {0: 3834944388080394183, 1: 11965577797857582925},
256: {0: 12578054101381780383, 1: 6432236967053365413},
512: {0: 8964060272711867887, 1: 4932377867961823298},
1024: {0: 10128075423705193231, 1: 13478118364967442607},
2048: {0: 10873419466579909583, 1: 2115085958593098761},
4096: {0: 4070443640866143422, 1: 4961060556868877236},
8192: {0: 18035946153837538605, 1: 2745127392817860183},
16384: {0: 10440079310404823051, 1: 17449655376855997137},
32768: {0: 3643662661208167111, 1: 8040054815272426829},
65536: {0: 10935678078244034623, 1: 9483227545069674309},
},
4097: {
1: {0: 12502862512299620788, 1: 8611155255377189980},
2: {0: 9181307251578323890, 1: 8609189328586327162},
4: {0: 2746848181595101094, 1: 8606425156353572383},
8: {0: 16463913002427077937, 1: 4531009983748580157},
16: {0: 5620696034248496124, 1: 1681598425519654588},
32: {0: 14741226801656710698, 1: 15129443590117138858},
64: {0: 6054465815016208075, 1: 17167720527236979867},
128: {0: 16740558302224955231, 1: 6094338199568812847},
256: {0: 16186283226139238340, 1: 9873750830371717112},
512: {0: 17128678634972789677, 1: 17686674735082264593},
1024: {0: 4584566501618899867, 1: 1571778330094025403},
2048: {0: 14362076614095467407, 1: 18052295794438597855},
4096: {0: 14068496717840570686, 1: 15528345011722562094},
8192: {0: 11973549300238445781, 1: 4445961311069353765},
16384: {0: 17875326558828238723, 1: 2623816128707679187},
32768: {0: 16383141035864297567, 1: 6192384169366743215},
65536: {0: 9434099907678191244, 1: 12315506264659309232},
},
}

hashes := map[uint64]struct{}{}
count := 0

Expand All @@ -362,6 +518,10 @@ func hashTest(t *testing.T, N uint64) {
rb2.AddInt(int(x))
}

checksum := rb1.Checksum()
expected := expectedChecksums[N][gap][0]
assert.EqualValues(t, expected, checksum, "N=%d, gap=%d, part 1", N, gap)

assert.EqualValues(t, rb1.Checksum(), rb2.Checksum())
count++
hashes[rb1.Checksum()] = struct{}{}
Expand All @@ -379,6 +539,10 @@ func hashTest(t *testing.T, N uint64) {
rb1.RunOptimize()
rb2.RunOptimize()

checksum = rb1.Checksum()
expected = expectedChecksums[N][gap][1]
assert.EqualValues(t, expected, checksum, "N=%d, gap=%d, part 2", N, gap)

assert.EqualValues(t, rb1.Checksum(), rb2.Checksum())
count++
hashes[rb1.Checksum()] = struct{}{}
Expand Down
Loading