From e96273f4c2022a94baee11bf646e7e920934f005 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 25 Nov 2025 23:39:02 -0500 Subject: [PATCH] simplify checksum --- roaring.go | 72 ++++++++++++++------- roaring_test.go | 164 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 24 deletions(-) diff --git a/roaring.go b/roaring.go index f511539a..9c04b27b 100644 --- a/roaring.go +++ b/roaring.go @@ -223,28 +223,24 @@ func (rb *Bitmap) WriteDenseTo(bitmap []uint64) { } } -// Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for +// Checksum computes a hash (FNV-1a) for a bitmap that is suitable for // using bitmaps as elements in hash sets or as keys in hash maps, as well as -// generally quicker comparisons. -// The implementation is biased towards efficiency in little endian machines, so -// expect some extra CPU cycles and memory to be used if your machine is big endian. -// Likewise, do not use this to verify integrity unless you are certain you will load -// the bitmap on a machine with the same endianess used to create it. (Thankfully -// very few people use big endian machines these days.) +// generally quick comparisons. func (rb *Bitmap) Checksum() uint64 { const ( offset = 14695981039346656037 prime = 1099511628211 ) - var bytes []byte - hash := uint64(offset) - bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys) - - for _, b := range bytes { - hash ^= uint64(b) + // Hash the keys (uint16 slice) directly + for _, key := range rb.highlowcontainer.keys { + // Hash low byte first (little endian) + hash ^= uint64(key & 0xFF) + hash *= prime + // Hash high byte + hash ^= uint64(key >> 8) hash *= prime } @@ -255,23 +251,51 @@ func (rb *Bitmap) Checksum() uint64 { switch c := c.(type) { case *bitmapContainer: - bytes = uint64SliceAsByteSlice(c.bitmap) + for _, val := range c.bitmap { + // Hash in little-endian byte order (unrolled loop) + hash ^= uint64(val & 0xFF) + hash *= prime + hash ^= uint64((val >> 8) & 0xFF) + hash *= prime + hash ^= uint64((val >> 16) & 0xFF) + hash *= prime + hash ^= uint64((val >> 24) & 0xFF) + hash *= prime + hash ^= uint64((val >> 32) & 0xFF) + hash *= prime + hash ^= uint64((val >> 40) & 0xFF) + hash *= prime + hash ^= uint64((val >> 48) & 0xFF) + hash *= prime + hash ^= uint64((val >> 56) & 0xFF) + hash *= prime + } case *arrayContainer: - bytes = uint16SliceAsByteSlice(c.content) + for _, val := range c.content { + // Hash low byte first (little endian) + hash ^= uint64(val & 0xFF) + hash *= prime + // Hash high byte + hash ^= uint64(val >> 8) + hash *= prime + } case *runContainer16: - bytes = interval16SliceAsByteSlice(c.iv) + for _, iv := range c.iv { + // Hash start (uint16) + hash ^= uint64(iv.start & 0xFF) + hash *= prime + hash ^= uint64(iv.start >> 8) + hash *= prime + // Hash length (uint16) + hash ^= uint64(iv.length & 0xFF) + hash *= prime + hash ^= uint64(iv.length >> 8) + hash *= prime + } default: panic("invalid container type") } - if len(bytes) == 0 { - panic("empty containers are not supported") - } - - for _, b := range bytes { - hash ^= uint64(b) - hash *= prime - } } return hash diff --git a/roaring_test.go b/roaring_test.go index cd2dcd30..26fd64ac 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -352,6 +352,162 @@ func checkValidity(t *testing.T, rb *Bitmap) { } func hashTest(t *testing.T, N uint64) { + // Expected checksums for reproducible testing + expectedChecksums := map[uint64]map[uint64]map[int]uint64{ + 15: { + 1: {0: 10756319042591085143, 1: 8628356015285516814}, + 2: {0: 9098333059944862327, 1: 8643661217147186334}, + 4: {0: 6958029410146852663, 1: 8552786581093523559}, + 8: {0: 1468016395925628727, 1: 17120221891265014711}, + 16: {0: 10544680338284849079, 1: 10276365928919663543}, + 32: {0: 16946816020221683671, 1: 15774941559772765735}, + 64: {0: 6805685596090222999, 1: 14027082878079462903}, + 128: {0: 10938873449103961751, 1: 15444148319012625847}, + 256: {0: 11309310771509366551, 1: 7069335405671456567}, + 512: {0: 12541562025061152055, 1: 16261505930335083639}, + 1024: {0: 2339435738231874871, 1: 10952176602620091831}, + 2048: {0: 3316768049318457015, 1: 17316440206395307191}, + 4096: {0: 7701764264875653559, 1: 14832271157678859191}, + 8192: {0: 16988454899365713580, 1: 10726883116002431148}, + 16384: {0: 2090028183478809173, 1: 9796457766245620565}, + 32768: {0: 7760976824469153733, 1: 12282386713985239749}, + 65536: {0: 981896277892437765, 1: 11087826385241811525}, + }, + 100: { + 1: {0: 9066279125779985483, 1: 8577657534118736529}, + 2: {0: 4369425401968277239, 1: 8664705869706981924}, + 4: {0: 11010263968690220268, 1: 8747926805318181728}, + 8: {0: 10633650503763838178, 1: 7914439743116618412}, + 16: {0: 2103672447757413801, 1: 17045885344622844731}, + 32: {0: 12393679577414425883, 1: 6955536801820872561}, + 64: {0: 8197316132833680708, 1: 8636063068238530422}, + 128: {0: 7231439595719530925, 1: 15483402246458171763}, + 256: {0: 16279390471979224259, 1: 10865940656713619409}, + 512: {0: 18178650019838088967, 1: 3561060448163512986}, + 1024: {0: 14129223433416501692, 1: 7566389309386577368}, + 2048: {0: 7894714367708308029, 1: 14322377208770974283}, + 4096: {0: 6808097750982305134, 1: 15258365281783128036}, + 8192: {0: 5220938908514583451, 1: 7907615236815205617}, + 16384: {0: 7801599049027983452, 1: 15366568190157828206}, + 32768: {0: 12138675384054307626, 1: 1639002974230284776}, + 65536: {0: 14890641509296776915, 1: 4669671005208206569}, + }, + 512: { + 1: {0: 827073066654895541, 1: 8612092039284236527}, + 2: {0: 12579948327828555139, 1: 8611133265144625760}, + 4: {0: 13237566687897418215, 1: 8610189884167809947}, + 8: {0: 1963122788668779039, 1: 11510392801384632065}, + 16: {0: 16746247274296044527, 1: 13465507099452589101}, + 32: {0: 825734909322010511, 1: 12319660855737761317}, + 64: {0: 9549528494128535247, 1: 14223792047697043957}, + 128: {0: 4041381740454103948, 1: 6750203723185038846}, + 256: {0: 7479480802351225402, 1: 15120562265861553528}, + 512: {0: 10226096245326616211, 1: 2123185684866484358}, + 1024: {0: 11147571132137264071, 1: 1491568024944778791}, + 2048: {0: 13981076381879476127, 1: 9988660227473270361}, + 4096: {0: 11371001616995089391, 1: 6746885678345436557}, + 8192: {0: 13391729850604800783, 1: 8278994428803645669}, + 16384: {0: 16904558478699170767, 1: 18317580384194851125}, + 32768: {0: 9632691026248850622, 1: 7001884646453199196}, + 65536: {0: 5681615685697371437, 1: 9355850228005660679}, + }, + 1023: { + 1: {0: 8631198422780375735, 1: 8613055211470360138}, + 2: {0: 5320069948751931063, 1: 8613042017330821606}, + 4: {0: 17410506490660662967, 1: 8613989796354150263}, + 8: {0: 5518796129031940535, 1: 746402130471049399}, + 16: {0: 14798031165738763703, 1: 12612339809927874231}, + 32: {0: 15315673636708793783, 1: 9532837701920862135}, + 64: {0: 7522839260639306167, 1: 2823706096559924663}, + 128: {0: 9587520344641008812, 1: 7726041297071629484}, + 256: {0: 17155103986506795605, 1: 6313321014394915413}, + 512: {0: 12175764683182123845, 1: 15506743299236676933}, + 1024: {0: 3571090491137291397, 1: 10578105683313814149}, + 2048: {0: 2237728790313941541, 1: 1717755092268207653}, + 4096: {0: 9686908407091041829, 1: 11237119237887424037}, + 8192: {0: 1820574113220693541, 1: 6255563087371431461}, + 16384: {0: 4282238745751428901, 1: 16052809024863665957}, + 32768: {0: 4757895643122848293, 1: 11441606648169673253}, + 65536: {0: 13049746141697269285, 1: 16352630024729701925}, + }, + 1025: { + 1: {0: 16082706216882239750, 1: 8611133265144625760}, + 2: {0: 12372593145282835221, 1: 8609215716865404226}, + 4: {0: 16585165836803014019, 1: 8606372379795418255}, + 8: {0: 10608955794586209375, 1: 56703568294819583}, + 16: {0: 8276728750771033399, 1: 8063557333063131655}, + 32: {0: 10794409826319712711, 1: 10503624278129685111}, + 64: {0: 18176085198269729708, 1: 8204216864690730060}, + 128: {0: 17952727548789690314, 1: 12359753801590897482}, + 256: {0: 14990418481054615768, 1: 17958305146627430476}, + 512: {0: 4875534555349427013, 1: 17195640123067758377}, + 1024: {0: 5974583595409151659, 1: 8978053391731676363}, + 2048: {0: 3044574169068517039, 1: 14966468005696446975}, + 4096: {0: 691405335039223607, 1: 639128391183175735}, + 8192: {0: 4038113400756120263, 1: 12825785820597459191}, + 16384: {0: 1707028181932787630, 1: 12526547269329962670}, + 32768: {0: 5628393221510929589, 1: 6322935199976410277}, + 65536: {0: 14552713712975952780, 1: 14116387162697418672}, + }, + 4095: { + 1: {0: 16949218234402177463, 1: 8613068405609898670}, + 2: {0: 6830997255050832823, 1: 8613085997795950046}, + 4: {0: 3431046584319368119, 1: 8614077757284407143}, + 8: {0: 10577784705589702583, 1: 11227064421971279799}, + 16: {0: 13582283525559942071, 1: 8280698677262929847}, + 32: {0: 15930731229079543980, 1: 14540819050448025772}, + 64: {0: 15074197189812092501, 1: 8183429771446961749}, + 128: {0: 16353918313334514501, 1: 8211290591646809413}, + 256: {0: 17541269277318240901, 1: 15870350090812465285}, + 512: {0: 2886944576023397925, 1: 836005010297672229}, + 1024: {0: 2513825157712442917, 1: 4097762838503421477}, + 2048: {0: 7990320838034419237, 1: 2629680369156025893}, + 4096: {0: 15715908757492956965, 1: 5113759417806159653}, + 8192: {0: 8124527689922589221, 1: 9297845031996404261}, + 16384: {0: 4269569377294204453, 1: 4817984819978255909}, + 32768: {0: 6778708389721242917, 1: 6948029477167889701}, + 65536: {0: 7213964021817199397, 1: 1491414404504750885}, + }, + 4096: { + 1: {0: 11204821770578667958, 1: 8612076646121441573}, + 2: {0: 4002267072615482294, 1: 8611102478819035852}, + 4: {0: 4410347288001337270, 1: 8610110719330578755}, + 8: {0: 8603373155648831414, 1: 2315836927982075848}, + 16: {0: 16944482119189024652, 1: 4822482652523786870}, + 32: {0: 6928244183881028666, 1: 10404343182114103400}, + 64: {0: 3024916132362050707, 1: 18344738716466403161}, + 128: {0: 3834944388080394183, 1: 11965577797857582925}, + 256: {0: 12578054101381780383, 1: 6432236967053365413}, + 512: {0: 8964060272711867887, 1: 4932377867961823298}, + 1024: {0: 10128075423705193231, 1: 13478118364967442607}, + 2048: {0: 10873419466579909583, 1: 2115085958593098761}, + 4096: {0: 4070443640866143422, 1: 4961060556868877236}, + 8192: {0: 18035946153837538605, 1: 2745127392817860183}, + 16384: {0: 10440079310404823051, 1: 17449655376855997137}, + 32768: {0: 3643662661208167111, 1: 8040054815272426829}, + 65536: {0: 10935678078244034623, 1: 9483227545069674309}, + }, + 4097: { + 1: {0: 12502862512299620788, 1: 8611155255377189980}, + 2: {0: 9181307251578323890, 1: 8609189328586327162}, + 4: {0: 2746848181595101094, 1: 8606425156353572383}, + 8: {0: 16463913002427077937, 1: 4531009983748580157}, + 16: {0: 5620696034248496124, 1: 1681598425519654588}, + 32: {0: 14741226801656710698, 1: 15129443590117138858}, + 64: {0: 6054465815016208075, 1: 17167720527236979867}, + 128: {0: 16740558302224955231, 1: 6094338199568812847}, + 256: {0: 16186283226139238340, 1: 9873750830371717112}, + 512: {0: 17128678634972789677, 1: 17686674735082264593}, + 1024: {0: 4584566501618899867, 1: 1571778330094025403}, + 2048: {0: 14362076614095467407, 1: 18052295794438597855}, + 4096: {0: 14068496717840570686, 1: 15528345011722562094}, + 8192: {0: 11973549300238445781, 1: 4445961311069353765}, + 16384: {0: 17875326558828238723, 1: 2623816128707679187}, + 32768: {0: 16383141035864297567, 1: 6192384169366743215}, + 65536: {0: 9434099907678191244, 1: 12315506264659309232}, + }, + } + hashes := map[uint64]struct{}{} count := 0 @@ -362,6 +518,10 @@ func hashTest(t *testing.T, N uint64) { rb2.AddInt(int(x)) } + checksum := rb1.Checksum() + expected := expectedChecksums[N][gap][0] + assert.EqualValues(t, expected, checksum, "N=%d, gap=%d, part 1", N, gap) + assert.EqualValues(t, rb1.Checksum(), rb2.Checksum()) count++ hashes[rb1.Checksum()] = struct{}{} @@ -379,6 +539,10 @@ func hashTest(t *testing.T, N uint64) { rb1.RunOptimize() rb2.RunOptimize() + checksum = rb1.Checksum() + expected = expectedChecksums[N][gap][1] + assert.EqualValues(t, expected, checksum, "N=%d, gap=%d, part 2", N, gap) + assert.EqualValues(t, rb1.Checksum(), rb2.Checksum()) count++ hashes[rb1.Checksum()] = struct{}{}