From 9ebb1c38d80cfeda13ac84b9705cf0c6087b1202 Mon Sep 17 00:00:00 2001 From: Roger Peppe Date: Fri, 7 Nov 2025 18:11:04 +0000 Subject: [PATCH 1/2] more efficient UnsetIterator implementation Also add some property-based tests with a small corpus of values. Signed-off-by: Roger Peppe --- arraycontainer.go | 52 ++++++++ benchmark_test.go | 16 +++ bitmapcontainer.go | 56 +++++++++ property_test.go | 287 +++++++++++++++++++++++++++++++++++++++++++++ roaring.go | 216 +++++++++++++++++++++++----------- roaringarray.go | 1 + runcontainer.go | 55 +++++++++ 7 files changed, 615 insertions(+), 68 deletions(-) create mode 100644 property_test.go diff --git a/arraycontainer.go b/arraycontainer.go index 32ebc6cd..a47c173a 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -62,6 +62,58 @@ func (ac *arrayContainer) getManyIterator() manyIterable { return &shortIterator{ac.content, 0} } +type arrayContainerUnsetIterator struct { + content []uint16 + pos int + nextVal int +} + +func (acui *arrayContainerUnsetIterator) next() uint16 { + val := acui.nextVal + acui.nextVal++ + for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } + return uint16(val) +} + +func (acui *arrayContainerUnsetIterator) hasNext() bool { + return acui.nextVal < 65536 +} + +func (acui *arrayContainerUnsetIterator) peekNext() uint16 { + return uint16(acui.nextVal) +} + +func (acui *arrayContainerUnsetIterator) advanceIfNeeded(minval uint16) { + if !acui.hasNext() || acui.peekNext() >= minval { + return + } + acui.nextVal = int(minval) + acui.pos = binarySearch(acui.content, minval) + if acui.pos < 0 { + acui.pos = -acui.pos - 1 + } + for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } +} + +func newArrayContainerUnsetIterator(a *arrayContainer) *arrayContainerUnsetIterator { + acui := &arrayContainerUnsetIterator{content: a.content, pos: 0, nextVal: 0} + for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } + return acui +} + +func (ac *arrayContainer) getUnsetIterator() shortPeekable { + return newArrayContainerUnsetIterator(ac) +} + func (ac *arrayContainer) minimum() uint16 { return ac.content[0] // assume not empty } diff --git a/benchmark_test.go b/benchmark_test.go index b4dd928e..d54d86e0 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -600,6 +600,22 @@ func BenchmarkIterateRoaring(b *testing.B) { }) } }) + b.Run("unsetIterator", func(b *testing.B) { + b.ReportAllocs() + + s := Flip(newBitmap(), 0, 0x100000000) + + b.ResetTimer() + + for j := 0; j < b.N; j++ { + c9 = uint(0) + i := s.UnsetIterator(0, 0xffffffff) + for i.HasNext() { + i.Next() + c9++ + } + } + }) } // go test -bench BenchmarkSparseIterate -run - diff --git a/bitmapcontainer.go b/bitmapcontainer.go index 10bc0f1c..89682092 100644 --- a/bitmapcontainer.go +++ b/bitmapcontainer.go @@ -262,6 +262,39 @@ func (bc *bitmapContainer) getManyIterator() manyIterable { return newBitmapContainerManyIterator(bc) } +type bitmapContainerUnsetIterator struct { + ptr *bitmapContainer + i int +} + +func (bcui *bitmapContainerUnsetIterator) next() uint16 { + j := bcui.i + bcui.i = bcui.ptr.NextUnsetBit(uint(bcui.i) + 1) + return uint16(j) +} + +func (bcui *bitmapContainerUnsetIterator) hasNext() bool { + return bcui.i >= 0 && bcui.i < 65536 +} + +func (bcui *bitmapContainerUnsetIterator) peekNext() uint16 { + return uint16(bcui.i) +} + +func (bcui *bitmapContainerUnsetIterator) advanceIfNeeded(minval uint16) { + if bcui.hasNext() && bcui.peekNext() < minval { + bcui.i = bcui.ptr.NextUnsetBit(uint(minval)) + } +} + +func newBitmapContainerUnsetIterator(a *bitmapContainer) *bitmapContainerUnsetIterator { + return &bitmapContainerUnsetIterator{a, a.NextUnsetBit(0)} +} + +func (bc *bitmapContainer) getUnsetIterator() shortPeekable { + return newBitmapContainerUnsetIterator(bc) +} + func (bc *bitmapContainer) getSizeInBytes() int { return len(bc.bitmap) * 8 } @@ -1113,6 +1146,29 @@ func (bc *bitmapContainer) NextSetBit(i uint) int { return -1 } +func (bc *bitmapContainer) NextUnsetBit(i uint) int { + var ( + x = i / 64 + length = uint(len(bc.bitmap)) + ) + if x >= length { + return int(i) + } + w := bc.bitmap[x] + w = w >> uint(i%64) + w = ^w + if w != 0 { + return int(i) + countTrailingZeros(w) + } + x++ + for ; x < length; x++ { + if bc.bitmap[x] != 0xFFFFFFFFFFFFFFFF { + return int(x*64) + countTrailingZeros(^bc.bitmap[x]) + } + } + return int(length * 64) +} + // PrevSetBit returns the previous set bit e.g the previous int packed into the bitmaparray func (bc *bitmapContainer) PrevSetBit(i int) int { if i < 0 { diff --git a/property_test.go b/property_test.go new file mode 100644 index 00000000..09961e76 --- /dev/null +++ b/property_test.go @@ -0,0 +1,287 @@ +package roaring + +import ( + "fmt" + "math/rand" + "testing" +) + +// TestBitmapProperties runs all invariants against all bitmaps in the corpus +func TestBitmapProperties(t *testing.T) { + corpus := getBitmapCorpus() + invariants := getInvariants() + + for _, gen := range corpus { + for _, inv := range invariants { + t.Run(fmt.Sprintf("%s/%s", gen.name, inv.name), func(t *testing.T) { + b := gen.gen() + inv.test(t, b) + }) + } + } +} + +// TestBitmapPropertiesWithRunOptimize tests all invariants on RunOptimize'd bitmaps +func TestBitmapPropertiesWithRunOptimize(t *testing.T) { + corpus := getBitmapCorpus() + invariants := getInvariants() + + for _, gen := range corpus { + for _, inv := range invariants { + t.Run(fmt.Sprintf("%s/%s_optimized", gen.name, inv.name), func(t *testing.T) { + b := gen.gen() + b.RunOptimize() + inv.test(t, b) + }) + } + } +} + +// bitmapGenerator is a function that creates a test bitmap +type bitmapGenerator struct { + name string + gen func() *Bitmap +} + +// invariant is a property that should hold for all bitmaps +type invariant struct { + name string + test func(t *testing.T, b *Bitmap) +} + +// getInvariants returns all property invariants to test +func getInvariants() []invariant { + return []invariant{ + {name: "doubleflip", test: doubleFlipInvariant}, + {name: "iteratorbits", test: iteratorBitsInvariant}, + {name: "unsetiteratorbits", test: unsetIteratorBitsInvariant}, + } +} + +// doubleFlipInvariant checks that flip(flip(b)) == b +func doubleFlipInvariant(t *testing.T, b *Bitmap) { + original := b.Clone() + + // Find the range to flip (slightly larger than the bitmap extent) + var maxVal uint64 + if b.IsEmpty() { + maxVal = 1000 + } else { + maxVal = uint64(b.Maximum()) + 1000 + } + + // Flip twice + b.Flip(0, maxVal) + b.Flip(0, maxVal) + + // Should be equal to original + if !original.Equals(b) { + t.Errorf("double flip should restore original bitmap, original card=%d, result card=%d", + original.GetCardinality(), b.GetCardinality()) + } +} + +// iteratorBitsInvariant checks that creating a bitmap from iterator bits gives the same bitmap +func iteratorBitsInvariant(t *testing.T, b *Bitmap) { + original := b.Clone() + + // Create new bitmap from iterator + result := NewBitmap() + iter := original.Iterator() + for iter.HasNext() { + result.Add(iter.Next()) + } + + // Should be equal to original + if !original.Equals(result) { + t.Errorf("bitmap reconstructed from iterator should equal original, original card=%d, result card=%d", + original.GetCardinality(), result.GetCardinality()) + } +} + +// unsetIteratorBitsInvariant checks that creating a bitmap from unset iterator, then flipping, gives the same bitmap +func unsetIteratorBitsInvariant(t *testing.T, b *Bitmap) { + original := b.Clone() + + numUnset := 0x100000000 - b.GetCardinality() + if numUnset > 1000000 { + t.Skip("too many iterations") + } + + // Create bitmap from unset bits + result := NewBitmap() + iter := original.UnsetIterator(0, 0x100000000) + i := 0 + for iter.HasNext() { + i++ + result.Add(iter.Next()) + } + + // Flip the result in the same range + result.Flip(0, 0x100000000) + + // Should be equal to original + if !original.Equals(result) { + t.Errorf("bitmap reconstructed from unset iterator + flip should equal original, original card=%d, result card=%d", + original.GetCardinality(), result.GetCardinality()) + } +} + +// getBitmapCorpus returns a diverse set of bitmaps for property testing +func getBitmapCorpus() []bitmapGenerator { + return []bitmapGenerator{ + { + name: "empty", + gen: func() *Bitmap { + return NewBitmap() + }, + }, + { + name: "single_bit", + gen: func() *Bitmap { + b := NewBitmap() + b.Add(42) + return b + }, + }, + { + name: "sparse_small", + gen: func() *Bitmap { + b := NewBitmap() + for i := 0; i < 100; i++ { + b.Add(uint32(i * 1000)) + } + return b + }, + }, + { + name: "sparse_random", + gen: func() *Bitmap { + b := NewBitmap() + r := rand.New(rand.NewSource(12345)) + domain := 100000000 + count := 10000 + for j := 0; j < count; j++ { + v := uint32(r.Intn(domain)) + b.Add(v) + } + return b + }, + }, + { + name: "dense_small", + gen: func() *Bitmap { + b := NewBitmap() + for i := 0; i < 10000; i++ { + b.Add(uint32(i)) + } + return b + }, + }, + { + name: "dense_range", + gen: func() *Bitmap { + b := NewBitmap() + b.AddRange(0, 100000) + return b + }, + }, + { + name: "sequential_ranges", + gen: func() *Bitmap { + b := NewBitmap() + b.AddRange(0, 1000) + b.AddRange(10000, 11000) + b.AddRange(100000, 101000) + return b + }, + }, + { + name: "mixed_containers", + gen: func() *Bitmap { + b := NewBitmap() + // Sparse in first container + for i := 0; i < 100; i++ { + b.Add(uint32(i * 100)) + } + // Dense in second container + for i := 0; i < 60000; i++ { + b.Add(uint32(65536 + i)) + } + // Sparse in third container + for i := 0; i < 50; i++ { + b.Add(uint32(131072 + i*1000)) + } + return b + }, + }, + { + name: "alternating_bits", + gen: func() *Bitmap { + b := NewBitmap() + for i := 0; i < 100000; i += 2 { + b.Add(uint32(i)) + } + return b + }, + }, + { + name: "high_values", + gen: func() *Bitmap { + b := NewBitmap() + r := rand.New(rand.NewSource(54321)) + for i := 0; i < 1000; i++ { + v := uint32(r.Intn(0x70000000) + 0x7fffffff) + b.Add(v) + } + return b + }, + }, + { + name: "iterator_benchmark_sparse", + gen: func() *Bitmap { + // Based on BenchmarkIteratorAlloc + b := NewBitmap() + r := rand.New(rand.NewSource(0)) + sz := 1000000 + initsize := 50000 + for i := 0; i < initsize; i++ { + b.Add(uint32(r.Intn(sz))) + } + return b + }, + }, + { + name: "iterator_benchmark_dense", + gen: func() *Bitmap { + // Based on BenchmarkNexts + b := NewBitmap() + for i := 0; i < 200000; i++ { + b.Add(uint32(i)) + } + return b + }, + }, + { + name: "iterator_benchmark_rle", + gen: func() *Bitmap { + // Based on BenchmarkNextsRLE + b := NewBitmap() + b.AddRange(0, 1000000) + b.RunOptimize() + return b + }, + }, + { + name: "gaps_and_runs", + gen: func() *Bitmap { + b := NewBitmap() + for i := 0; i < 10; i++ { + start := uint64(i * 100000) + b.AddRange(start, start+10000) + } + return b + }, + }, + } +} diff --git a/roaring.go b/roaring.go index 4327c678..99cc286e 100644 --- a/roaring.go +++ b/roaring.go @@ -743,99 +743,179 @@ func (ii *manyIntIterator) Initialize(a *Bitmap) { } type unsetIterator struct { + containerIndex int + nextKey int + hs uint32 + iter shortPeekable + highlowcontainer *roaringArray + + arrayUnsetIter arrayContainerUnsetIterator + runUnsetIter runUnsetIterator16 + bitmapUnsetIter bitmapContainerUnsetIterator + emptyContainerVal uint16 + start, end uint64 - current uint64 - it IntPeekable - hasNext bool } -// Initialize configures the unset iterator to iterate over values in [start, end) that are not in the bitmap -func (ui *unsetIterator) Initialize(b *Bitmap, start, end uint64) { - if end > 0x100000000 { - panic("end > 0x100000000") - } - ui.start = start - ui.end = end - ui.current = start - ui.it = b.Iterator() - // Advance to first value >= start - if start <= MaxUint32 { - ui.it.AdvanceIfNeeded(uint32(start)) +// HasNext returns true if there are more integers to iterate over +func (iui *unsetIterator) HasNext() bool { + // Skip containers that have no unset bits in our range + for iui.nextKey < 65536 && uint64(iui.nextKey)<<16 < iui.end { + if iui.iter == nil { + // We're in an empty container gap, which has unset bits + if uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) < iui.end { + return true + } + // Move to next container + iui.nextKey++ + iui.containerIndex++ + iui.init() + continue + } + if iui.iter.hasNext() { + // Check if next value is within range + nextVal := (uint64(iui.nextKey) << 16) | uint64(iui.iter.peekNext()) + if nextVal < iui.end { + return true + } + } + // Current container has no more unset bits in range, move to next + iui.nextKey++ + iui.containerIndex++ + iui.init() } - ui.updateHasNext() -} - -func (ui *unsetIterator) HasNext() bool { - return ui.hasNext + return false } -func (ui *unsetIterator) Next() uint32 { - if !ui.hasNext { - panic("Next() called when HasNext() returns false") +func (iui *unsetIterator) init() { + // Check if we've gone past the end range + if uint64(iui.nextKey)<<16 >= iui.end { + iui.iter = nil + return } - result := ui.current - ui.current++ - ui.updateHasNext() - return uint32(result) -} - -func (ui *unsetIterator) updateHasNext() { - for ui.current < ui.end { - if !ui.it.HasNext() { - // No more set bits, we have values to yield - ui.hasNext = true - return + // Check if we're in an empty container gap + if iui.containerIndex >= iui.highlowcontainer.size() || + iui.highlowcontainer.getKeyAtIndex(iui.containerIndex) > uint16(iui.nextKey) { + // We're in a gap - iterate through empty container + iui.emptyContainerVal = 0 + // If this container overlaps with start, advance to start + if uint64(iui.nextKey)<<16 < iui.start && iui.start < uint64(iui.nextKey+1)<<16 { + iui.emptyContainerVal = uint16(iui.start) } + iui.iter = nil + return + } - nextSet := ui.it.PeekNext() - if uint64(nextSet) >= ui.end { - // Next set bit is at or beyond our range, we have values to yield - ui.hasNext = true - return - } + // We're in an actual container + iui.hs = uint32(iui.nextKey) << 16 + c := iui.highlowcontainer.getContainerAtIndex(iui.containerIndex) + switch t := c.(type) { + case *arrayContainer: + iui.arrayUnsetIter = *newArrayContainerUnsetIterator(t) + iui.iter = &iui.arrayUnsetIter + case *runContainer16: + iui.runUnsetIter = *t.newRunUnsetIterator16() + iui.iter = &iui.runUnsetIter + case *bitmapContainer: + iui.bitmapUnsetIter = *newBitmapContainerUnsetIterator(t) + iui.iter = &iui.bitmapUnsetIter + } - if ui.current < uint64(nextSet) { - // We have unset values before the next set bit - ui.hasNext = true - return - } + // If this container overlaps with start, advance to the low bits of start + if uint64(iui.nextKey)<<16 < iui.start && iui.start < uint64(iui.nextKey+1)<<16 { + iui.iter.advanceIfNeeded(uint16(iui.start)) + } +} - // Skip the set bit - ui.it.Next() - ui.current = uint64(nextSet) + 1 +// Next returns the next integer +func (iui *unsetIterator) Next() uint32 { + if iui.iter == nil { + // We're in an empty container gap + x := (uint32(iui.nextKey) << 16) | uint32(iui.emptyContainerVal) + iui.emptyContainerVal++ + if iui.emptyContainerVal == 0 || uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) >= iui.end { + // Wrapped around or reached end, move to next container + iui.nextKey++ + iui.init() + } + return x } - ui.hasNext = false + x := uint32(iui.iter.next()) | iui.hs + if !iui.iter.hasNext() || uint64(iui.nextKey)<<16|uint64(iui.iter.peekNext()) >= iui.end { + iui.nextKey++ + iui.containerIndex++ + iui.init() + } + return x } -// PeekNext returns the next value without advancing the iterator -func (ui *unsetIterator) PeekNext() uint32 { - if !ui.hasNext { +// PeekNext peeks the next value without advancing the iterator +func (iui *unsetIterator) PeekNext() uint32 { + if !iui.HasNext() { panic("PeekNext() called when HasNext() returns false") } - return uint32(ui.current) + if iui.iter == nil { + return (uint32(iui.nextKey) << 16) | uint32(iui.emptyContainerVal) + } + return uint32(iui.iter.peekNext()&maxLowBit) | iui.hs } -// AdvanceIfNeeded advances the iterator so that the next value is at least minval -func (ui *unsetIterator) AdvanceIfNeeded(minval uint32) { - if uint64(minval) <= ui.current { - return // Already at or past minval +// AdvanceIfNeeded advances as long as the next value is smaller than minval +func (iui *unsetIterator) AdvanceIfNeeded(minval uint32) { + targetKey := int(minval >> 16) + + for iui.HasNext() && iui.nextKey < targetKey { + iui.nextKey++ + // Find the next container that matches or exceeds nextKey + for iui.containerIndex < iui.highlowcontainer.size() && + int(iui.highlowcontainer.getKeyAtIndex(iui.containerIndex)) < iui.nextKey { + iui.containerIndex++ + } + iui.init() + } + + if iui.HasNext() && iui.nextKey == targetKey { + if iui.iter != nil { + iui.iter.advanceIfNeeded(lowbits(minval)) + if !iui.iter.hasNext() || uint64(iui.nextKey)<<16|uint64(iui.iter.peekNext()) >= iui.end { + iui.nextKey++ + iui.containerIndex++ + iui.init() + } + } else { + lowVal := lowbits(minval) + if iui.emptyContainerVal < lowVal { + iui.emptyContainerVal = lowVal + } + if uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) >= iui.end { + iui.nextKey++ + iui.containerIndex++ + iui.init() + } + } } +} - if uint64(minval) >= ui.end { - // At or beyond our range, no more values - ui.hasNext = false - return +// Initialize configures the unset iterator to iterate over values in [start, end) that are not in the bitmap +func (iui *unsetIterator) Initialize(a *Bitmap, start, end uint64) { + if end > 0x100000000 { + panic("end > 0x100000000") } + iui.start = start + iui.end = end + iui.containerIndex = 0 + iui.nextKey = int(start >> 16) + iui.highlowcontainer = &a.highlowcontainer - // Set current to minval, but make sure we skip any set bits - ui.current = uint64(minval) - - // Advance the internal iterator to be at or beyond minval - ui.it.AdvanceIfNeeded(minval) + // Find the first container that matches or exceeds the start key + for iui.containerIndex < iui.highlowcontainer.size() && + int(iui.highlowcontainer.getKeyAtIndex(iui.containerIndex)) < iui.nextKey { + iui.containerIndex++ + } - ui.updateHasNext() + iui.init() } // String creates a string representation of the Bitmap diff --git a/roaringarray.go b/roaringarray.go index 32c8ae78..31638d4b 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -40,6 +40,7 @@ type container interface { inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) xor(r container) container getShortIterator() shortPeekable + getUnsetIterator() shortPeekable iterate(cb func(x uint16) bool) bool getReverseIterator() shortIterable getManyIterator() manyIterable diff --git a/runcontainer.go b/runcontainer.go index 5293513c..2fcec0ab 100644 --- a/runcontainer.go +++ b/runcontainer.go @@ -1980,6 +1980,61 @@ func (rc *runContainer16) getManyIterator() manyIterable { return rc.newManyRunIterator16() } +type runUnsetIterator16 struct { + rc *runContainer16 + curIndex int + nextVal int +} + +func (rc *runContainer16) newRunUnsetIterator16() *runUnsetIterator16 { + rui := &runUnsetIterator16{rc: rc, curIndex: 0, nextVal: 0} + if len(rc.iv) > 0 && rc.iv[0].start == 0 { + rui.nextVal = int(rc.iv[0].start) + int(rc.iv[0].length) + 1 + rui.curIndex = 1 + } + return rui +} + +func (rui *runUnsetIterator16) hasNext() bool { + return rui.nextVal < 65536 +} + +func (rui *runUnsetIterator16) next() uint16 { + val := rui.nextVal + rui.nextVal++ + if rui.curIndex < len(rui.rc.iv) && uint16(rui.nextVal) == rui.rc.iv[rui.curIndex].start { + rui.nextVal = int(rui.rc.iv[rui.curIndex].start) + int(rui.rc.iv[rui.curIndex].length) + 1 + rui.curIndex++ + } + return uint16(val) +} + +func (rui *runUnsetIterator16) peekNext() uint16 { + return uint16(rui.nextVal) +} + +func (rui *runUnsetIterator16) advanceIfNeeded(minval uint16) { + if !rui.hasNext() || rui.peekNext() >= minval { + return + } + rui.nextVal = int(minval) + for rui.curIndex < len(rui.rc.iv) { + if rui.rc.iv[rui.curIndex].start+rui.rc.iv[rui.curIndex].length < minval { + rui.curIndex++ + } else if rui.rc.iv[rui.curIndex].start <= minval { + rui.nextVal = int(rui.rc.iv[rui.curIndex].start) + int(rui.rc.iv[rui.curIndex].length) + 1 + rui.curIndex++ + break + } else { + break + } + } +} + +func (rc *runContainer16) getUnsetIterator() shortPeekable { + return rc.newRunUnsetIterator16() +} + // add the values in the range [firstOfRange, endx). endx // is still abe to express 2^16 because it is an int not an uint16. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { From 253828875255d70d08c6761bf2bb363c56fba612 Mon Sep 17 00:00:00 2001 From: Roger Peppe Date: Mon, 10 Nov 2025 11:42:56 +0000 Subject: [PATCH 2/2] UnsetIterator: address review comments Signed-off-by: Roger Peppe --- arraycontainer.go | 50 +---------------------------------------------- roaring.go | 2 +- runcontainer.go | 2 +- shortiterator.go | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 51 deletions(-) diff --git a/arraycontainer.go b/arraycontainer.go index a47c173a..a5c28717 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -62,56 +62,8 @@ func (ac *arrayContainer) getManyIterator() manyIterable { return &shortIterator{ac.content, 0} } -type arrayContainerUnsetIterator struct { - content []uint16 - pos int - nextVal int -} - -func (acui *arrayContainerUnsetIterator) next() uint16 { - val := acui.nextVal - acui.nextVal++ - for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { - acui.nextVal++ - acui.pos++ - } - return uint16(val) -} - -func (acui *arrayContainerUnsetIterator) hasNext() bool { - return acui.nextVal < 65536 -} - -func (acui *arrayContainerUnsetIterator) peekNext() uint16 { - return uint16(acui.nextVal) -} - -func (acui *arrayContainerUnsetIterator) advanceIfNeeded(minval uint16) { - if !acui.hasNext() || acui.peekNext() >= minval { - return - } - acui.nextVal = int(minval) - acui.pos = binarySearch(acui.content, minval) - if acui.pos < 0 { - acui.pos = -acui.pos - 1 - } - for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { - acui.nextVal++ - acui.pos++ - } -} - -func newArrayContainerUnsetIterator(a *arrayContainer) *arrayContainerUnsetIterator { - acui := &arrayContainerUnsetIterator{content: a.content, pos: 0, nextVal: 0} - for acui.pos < len(acui.content) && uint16(acui.nextVal) == acui.content[acui.pos] { - acui.nextVal++ - acui.pos++ - } - return acui -} - func (ac *arrayContainer) getUnsetIterator() shortPeekable { - return newArrayContainerUnsetIterator(ac) + return newArrayContainerUnsetIterator(ac.content) } func (ac *arrayContainer) minimum() uint16 { diff --git a/roaring.go b/roaring.go index 99cc286e..f511539a 100644 --- a/roaring.go +++ b/roaring.go @@ -812,7 +812,7 @@ func (iui *unsetIterator) init() { c := iui.highlowcontainer.getContainerAtIndex(iui.containerIndex) switch t := c.(type) { case *arrayContainer: - iui.arrayUnsetIter = *newArrayContainerUnsetIterator(t) + iui.arrayUnsetIter = *newArrayContainerUnsetIterator(t.content) iui.iter = &iui.arrayUnsetIter case *runContainer16: iui.runUnsetIter = *t.newRunUnsetIterator16() diff --git a/runcontainer.go b/runcontainer.go index 2fcec0ab..a0e71ade 100644 --- a/runcontainer.go +++ b/runcontainer.go @@ -2002,7 +2002,7 @@ func (rui *runUnsetIterator16) hasNext() bool { func (rui *runUnsetIterator16) next() uint16 { val := rui.nextVal rui.nextVal++ - if rui.curIndex < len(rui.rc.iv) && uint16(rui.nextVal) == rui.rc.iv[rui.curIndex].start { + if rui.curIndex < len(rui.rc.iv) && uint16(rui.nextVal) >= rui.rc.iv[rui.curIndex].start { rui.nextVal = int(rui.rc.iv[rui.curIndex].start) + int(rui.rc.iv[rui.curIndex].length) + 1 rui.curIndex++ } diff --git a/shortiterator.go b/shortiterator.go index 15b78bd0..53252580 100644 --- a/shortiterator.go +++ b/shortiterator.go @@ -50,3 +50,53 @@ func (si *reverseIterator) next() uint16 { si.loc-- return a } + +type arrayContainerUnsetIterator struct { + content []uint16 + // pos is the index of the next set bit that is >= nextVal. + // When nextVal reaches content[pos], pos is incremented. + pos int + nextVal int +} + +func (acui *arrayContainerUnsetIterator) next() uint16 { + val := acui.nextVal + acui.nextVal++ + for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } + return uint16(val) +} + +func (acui *arrayContainerUnsetIterator) hasNext() bool { + return acui.nextVal < 65536 +} + +func (acui *arrayContainerUnsetIterator) peekNext() uint16 { + return uint16(acui.nextVal) +} + +func (acui *arrayContainerUnsetIterator) advanceIfNeeded(minval uint16) { + if !acui.hasNext() || acui.peekNext() >= minval { + return + } + acui.nextVal = int(minval) + acui.pos = binarySearch(acui.content, minval) + if acui.pos < 0 { + acui.pos = -acui.pos - 1 + } + for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } +} + +func newArrayContainerUnsetIterator(content []uint16) *arrayContainerUnsetIterator { + acui := &arrayContainerUnsetIterator{content: content, pos: 0, nextVal: 0} + for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] { + acui.nextVal++ + acui.pos++ + } + return acui +}