diff --git a/iter.go b/iter.go index c121ab76..0e2d68fb 100644 --- a/iter.go +++ b/iter.go @@ -27,3 +27,16 @@ func Backward(b *Bitmap) func(func(uint32) bool) { } } } + +// Unset creates an iterator that yields values in the range [min, max] that are NOT contained in the bitmap. +// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func Unset(b *Bitmap, min, max uint32) func(func(uint32) bool) { + return func(yield func(uint32) bool) { + it := b.UnsetIterator(min, max) + for it.HasNext() { + if !yield(it.Next()) { + return + } + } + } +} diff --git a/iter_test.go b/iter_test.go index 1efd5b41..4aac2fa2 100644 --- a/iter_test.go +++ b/iter_test.go @@ -119,3 +119,346 @@ func TestValues(t *testing.T) { assert.Equal(t, testSize, n) } + +func TestUnset(t *testing.T) { + t.Run("empty bitmap", func(t *testing.T) { + b := New() + it := Unset(b, 5, 10) + + expected := []uint32{5, 6, 7, 8, 9, 10} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("bitmap with some values set", func(t *testing.T) { + b := New() + b.AddInt(3) + b.AddInt(7) + b.AddInt(12) + + it := Unset(b, 5, 10) + + expected := []uint32{5, 6, 8, 9, 10} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("range completely outside bitmap", func(t *testing.T) { + b := New() + b.AddInt(1) + b.AddInt(2) + b.AddInt(3) + + it := Unset(b, 10, 15) + + expected := []uint32{10, 11, 12, 13, 14, 15} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("range includes set and unset values", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + b.AddInt(9) + + it := Unset(b, 3, 12) + + expected := []uint32{3, 4, 6, 7, 10, 11, 12} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("min greater than max", func(t *testing.T) { + b := New() + it := Unset(b, 10, 5) + + count := 0 + it(func(val uint32) bool { + count++ + return true + }) + + assert.Equal(t, 0, count) + }) + + t.Run("single value range - unset", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := Unset(b, 3, 3) + + expected := []uint32{3} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("single value range - set", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := Unset(b, 5, 5) + + count := 0 + it(func(val uint32) bool { + count++ + return true + }) + + assert.Equal(t, 0, count) + }) + + t.Run("early termination", func(t *testing.T) { + b := New() + + it := Unset(b, 1, 10) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return len(actual) < 3 // Stop after 3 values + }) + + expected := []uint32{1, 2, 3} + assert.Equal(t, expected, actual) + }) + + t.Run("large range with sparse bitmap", func(t *testing.T) { + b := New() + b.AddInt(100) + b.AddInt(500) + b.AddInt(1000) + + it := Unset(b, 50, 150) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + // Should include all values from 50-150 except 100 + assert.Equal(t, 100, len(actual)) // 150-50+1-1 = 100 + assert.Contains(t, actual, uint32(50)) + assert.Contains(t, actual, uint32(99)) + assert.NotContains(t, actual, uint32(100)) + assert.Contains(t, actual, uint32(101)) + assert.Contains(t, actual, uint32(150)) + }) + + t.Run("min is in the bitmap", func(t *testing.T) { + b := New() + b.AddInt(100) + + it := Unset(b, 100, 105) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + expected := []uint32{101, 102, 103, 104, 105} + + assert.Equal(t, expected, actual) + }) + + t.Run("extreme max", func(t *testing.T) { + b := New() + b.Add(4294967295) + + it := Unset(b, 4294967294, 4294967295) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + expected := []uint32{4294967294} + + assert.Equal(t, expected, actual) + }) +} + +func TestUnsetIteratorPeekable(t *testing.T) { + t.Run("peek next", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + + it := b.UnsetIterator(3, 10) + + // First value should be 3 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(3), it.PeekNext()) + assert.Equal(t, uint32(3), it.Next()) + + // Next should be 4 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4), it.PeekNext()) + assert.Equal(t, uint32(4), it.Next()) + + // Next should be 6 (skipping 5 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(6), it.PeekNext()) + assert.Equal(t, uint32(6), it.Next()) + + // Next should be 7 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(7), it.PeekNext()) + assert.Equal(t, uint32(7), it.Next()) + + // Next should be 9 (skipping 8 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(9), it.PeekNext()) + assert.Equal(t, uint32(9), it.Next()) + + // Next should be 10 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(10), it.PeekNext()) + assert.Equal(t, uint32(10), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) + + t.Run("advance if needed", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + b.AddInt(12) + + it := b.UnsetIterator(1, 15) + + // Skip to values >= 7 + it.AdvanceIfNeeded(7) + + // Should now be at 7 (skipping 5 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(7), it.PeekNext()) + assert.Equal(t, uint32(7), it.Next()) + + // Next should be 9 (skipping 8 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(9), it.PeekNext()) + assert.Equal(t, uint32(9), it.Next()) + + // Skip to values >= 11 + it.AdvanceIfNeeded(11) + + // Should now be at 11 (skipping 12 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(11), it.PeekNext()) + assert.Equal(t, uint32(11), it.Next()) + + // Next should be 13 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(13), it.PeekNext()) + assert.Equal(t, uint32(13), it.Next()) + + // Skip beyond range + it.AdvanceIfNeeded(20) + assert.False(t, it.HasNext()) + }) + + t.Run("advance if needed before range", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := b.UnsetIterator(10, 15) + + // Try to advance to a value before our range start + it.AdvanceIfNeeded(5) + + // Should still start from 10 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(10), it.PeekNext()) + }) + + t.Run("advance if needed beyond range", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := b.UnsetIterator(10, 15) + + // Advance beyond our range + it.AdvanceIfNeeded(20) + + // Should have no more values + assert.False(t, it.HasNext()) + }) + + t.Run("peek next on empty iterator", func(t *testing.T) { + b := New() + b.AddInt(5) // Set bit in middle of range + + it := b.UnsetIterator(5, 5) // Range contains only the set bit + + // Should have no values + assert.False(t, it.HasNext()) + + // PeekNext should panic when HasNext is false + assert.Panics(t, func() { + it.PeekNext() + }) + }) + + t.Run("range including max uint32 unset", func(t *testing.T) { + b := New() + b.Add(4294967294) // Set the value before max + + it := b.UnsetIterator(4294967294, 4294967295) + + // Should have 4294967295 (max uint32) as it's unset + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4294967295), it.PeekNext()) + assert.Equal(t, uint32(4294967295), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) + + t.Run("max uint32 set", func(t *testing.T) { + b := New() + b.Add(4294967295) // Set max uint32 + + it := b.UnsetIterator(4294967294, 4294967295) + + // Should have 4294967294 as it's unset, but not 4294967295 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4294967294), it.PeekNext()) + assert.Equal(t, uint32(4294967294), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) +} diff --git a/roaring.go b/roaring.go index 84c7f05c..f433a581 100644 --- a/roaring.go +++ b/roaring.go @@ -742,6 +742,97 @@ func (ii *manyIntIterator) Initialize(a *Bitmap) { ii.init() } +type unsetIterator struct { + min, max uint32 + current uint64 // use uint64 to avoid overflow + it IntPeekable + hasNext bool +} + +// Initialize configures the unset iterator to iterate over values in [min, max] that are not in the bitmap +func (ui *unsetIterator) Initialize(b *Bitmap, min, max uint32) { + ui.min = min + ui.max = max + ui.current = uint64(min) + ui.it = b.Iterator() + // Advance to first value >= min + ui.it.AdvanceIfNeeded(min) + ui.updateHasNext() +} + +func (ui *unsetIterator) HasNext() bool { + return ui.hasNext +} + +func (ui *unsetIterator) Next() uint32 { + if !ui.hasNext { + panic("Next() called when HasNext() returns false") + } + + result := ui.current + ui.current++ + ui.updateHasNext() + return uint32(result) +} + +func (ui *unsetIterator) updateHasNext() { + for ui.current <= uint64(ui.max) { + if !ui.it.HasNext() { + // No more set bits, we have values to yield + ui.hasNext = true + return + } + + nextSet := ui.it.PeekNext() + if nextSet > ui.max { + // Next set bit is beyond our range, we have values to yield + ui.hasNext = true + return + } + + if ui.current < uint64(nextSet) { + // We have unset values before the next set bit + ui.hasNext = true + return + } + + // Skip the set bit + ui.it.Next() + ui.current = uint64(nextSet) + 1 + } + + ui.hasNext = false +} + +// PeekNext returns the next value without advancing the iterator +func (ui *unsetIterator) PeekNext() uint32 { + if !ui.hasNext { + panic("PeekNext() called when HasNext() returns false") + } + return uint32(ui.current) +} + +// AdvanceIfNeeded advances the iterator so that the next value is at least minval +func (ui *unsetIterator) AdvanceIfNeeded(minval uint32) { + if minval <= ui.min { + return // Already at or before the start of our range + } + + if minval > ui.max { + // Beyond our range, no more values + ui.hasNext = false + return + } + + // Set current to minval, but make sure we skip any set bits + ui.current = uint64(minval) + + // Advance the internal iterator to be at or beyond minval + ui.it.AdvanceIfNeeded(minval) + + ui.updateHasNext() +} + // String creates a string representation of the Bitmap func (rb *Bitmap) String() string { // inspired by https://github.com/fzandona/goroar/ @@ -824,6 +915,14 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable { return p } +// UnsetIterator creates a new IntPeekable to iterate over values in the range [min, max] that are NOT contained in the bitmap. +// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func (rb *Bitmap) UnsetIterator(min, max uint32) IntPeekable { + p := new(unsetIterator) + p.Initialize(rb, min, max) + return p +} + // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { ptr := new(Bitmap)