From fb89c43ec826c5b8f9588be1ca6cfe86a3078351 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 5 Nov 2025 13:23:13 -0500 Subject: [PATCH 1/7] fixing issue 22. --- iter.go | 13 +++++ iter_test.go | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++ roaring.go | 72 +++++++++++++++++++++++ 3 files changed, 242 insertions(+) diff --git a/iter.go b/iter.go index c121ab76..0e2d68fb 100644 --- a/iter.go +++ b/iter.go @@ -27,3 +27,16 @@ func Backward(b *Bitmap) func(func(uint32) bool) { } } } + +// Unset creates an iterator that yields values in the range [min, max] that are NOT contained in the bitmap. +// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func Unset(b *Bitmap, min, max uint32) func(func(uint32) bool) { + return func(yield func(uint32) bool) { + it := b.UnsetIterator(min, max) + for it.HasNext() { + if !yield(it.Next()) { + return + } + } + } +} diff --git a/iter_test.go b/iter_test.go index 1efd5b41..48951b42 100644 --- a/iter_test.go +++ b/iter_test.go @@ -119,3 +119,160 @@ func TestValues(t *testing.T) { assert.Equal(t, testSize, n) } + +func TestUnset(t *testing.T) { + t.Run("empty bitmap", func(t *testing.T) { + b := New() + it := Unset(b, 5, 10) + + expected := []uint32{5, 6, 7, 8, 9, 10} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("bitmap with some values set", func(t *testing.T) { + b := New() + b.AddInt(3) + b.AddInt(7) + b.AddInt(12) + + it := Unset(b, 5, 10) + + expected := []uint32{5, 6, 8, 9, 10} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("range completely outside bitmap", func(t *testing.T) { + b := New() + b.AddInt(1) + b.AddInt(2) + b.AddInt(3) + + it := Unset(b, 10, 15) + + expected := []uint32{10, 11, 12, 13, 14, 15} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("range includes set and unset values", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + b.AddInt(9) + + it := Unset(b, 3, 12) + + expected := []uint32{3, 4, 6, 7, 10, 11, 12} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("min greater than max", func(t *testing.T) { + b := New() + it := Unset(b, 10, 5) + + count := 0 + it(func(val uint32) bool { + count++ + return true + }) + + assert.Equal(t, 0, count) + }) + + t.Run("single value range - unset", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := Unset(b, 3, 3) + + expected := []uint32{3} + actual := make([]uint32, 0) + + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + assert.Equal(t, expected, actual) + }) + + t.Run("single value range - set", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := Unset(b, 5, 5) + + count := 0 + it(func(val uint32) bool { + count++ + return true + }) + + assert.Equal(t, 0, count) + }) + + t.Run("early termination", func(t *testing.T) { + b := New() + + it := Unset(b, 1, 10) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return len(actual) < 3 // Stop after 3 values + }) + + expected := []uint32{1, 2, 3} + assert.Equal(t, expected, actual) + }) + + t.Run("large range with sparse bitmap", func(t *testing.T) { + b := New() + b.AddInt(100) + b.AddInt(500) + b.AddInt(1000) + + it := Unset(b, 50, 150) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + + // Should include all values from 50-150 except 100 + assert.Equal(t, 100, len(actual)) // 150-50+1 - 1 = 101 - 1 = 100 + assert.Contains(t, actual, uint32(50)) + assert.Contains(t, actual, uint32(99)) + assert.NotContains(t, actual, uint32(100)) + assert.Contains(t, actual, uint32(101)) + assert.Contains(t, actual, uint32(150)) + }) +} diff --git a/roaring.go b/roaring.go index 84c7f05c..9d136902 100644 --- a/roaring.go +++ b/roaring.go @@ -742,6 +742,70 @@ func (ii *manyIntIterator) Initialize(a *Bitmap) { ii.init() } +type unsetIterator struct { + min, max uint32 + current uint32 + it IntPeekable + hasNext bool +} + +// Initialize configures the unset iterator to iterate over values in [min, max] that are not in the bitmap +func (ui *unsetIterator) Initialize(b *Bitmap, min, max uint32) { + ui.min = min + ui.max = max + ui.current = min + ui.it = b.Iterator() + ui.hasNext = min <= max + + // Advance to first value >= min + ui.it.AdvanceIfNeeded(min) + ui.updateHasNext() +} + +func (ui *unsetIterator) HasNext() bool { + return ui.hasNext +} + +func (ui *unsetIterator) Next() uint32 { + if !ui.hasNext { + panic("Next() called when HasNext() returns false") + } + + result := ui.current + ui.current++ + ui.updateHasNext() + return result +} + +func (ui *unsetIterator) updateHasNext() { + for ui.current <= ui.max { + if !ui.it.HasNext() { + // No more set bits, we have values to yield + ui.hasNext = true + return + } + + nextSet := ui.it.PeekNext() + if nextSet > ui.max { + // Next set bit is beyond our range, we have values to yield + ui.hasNext = true + return + } + + if ui.current < nextSet { + // We have unset values before the next set bit + ui.hasNext = true + return + } + + // Skip the set bit + ui.it.Next() + ui.current = nextSet + 1 + } + + ui.hasNext = false +} + // String creates a string representation of the Bitmap func (rb *Bitmap) String() string { // inspired by https://github.com/fzandona/goroar/ @@ -824,6 +888,14 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable { return p } +// UnsetIterator creates a new IntIterable to iterate over values in the range [min, max] that are NOT contained in the bitmap. +// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func (rb *Bitmap) UnsetIterator(min, max uint32) IntIterable { + p := new(unsetIterator) + p.Initialize(rb, min, max) + return p +} + // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { ptr := new(Bitmap) From 0a5805fab15db7fbe8059f7e3b69f86b767ab16f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 5 Nov 2025 13:33:06 -0500 Subject: [PATCH 2/7] Update iter_test.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- iter_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iter_test.go b/iter_test.go index 48951b42..a56f5c3a 100644 --- a/iter_test.go +++ b/iter_test.go @@ -268,7 +268,7 @@ func TestUnset(t *testing.T) { }) // Should include all values from 50-150 except 100 - assert.Equal(t, 100, len(actual)) // 150-50+1 - 1 = 101 - 1 = 100 + assert.Equal(t, 100, len(actual)) // 150-50+1-1 = 100 assert.Contains(t, actual, uint32(50)) assert.Contains(t, actual, uint32(99)) assert.NotContains(t, actual, uint32(100)) From 7db47d6eccc0b15f425218ae16fea2cfaec30049 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 5 Nov 2025 13:35:57 -0500 Subject: [PATCH 3/7] more testing and simplification. --- iter_test.go | 16 ++++++++++++++++ roaring.go | 2 -- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/iter_test.go b/iter_test.go index 48951b42..34e7745c 100644 --- a/iter_test.go +++ b/iter_test.go @@ -275,4 +275,20 @@ func TestUnset(t *testing.T) { assert.Contains(t, actual, uint32(101)) assert.Contains(t, actual, uint32(150)) }) + + t.Run("min is in the bitmap", func(t *testing.T) { + b := New() + b.AddInt(100) + + it := Unset(b, 100, 105) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + expected := []uint32{101, 102, 103, 104, 105} + + assert.Equal(t, expected, actual) + }) } diff --git a/roaring.go b/roaring.go index 9d136902..2bdaaee5 100644 --- a/roaring.go +++ b/roaring.go @@ -755,8 +755,6 @@ func (ui *unsetIterator) Initialize(b *Bitmap, min, max uint32) { ui.max = max ui.current = min ui.it = b.Iterator() - ui.hasNext = min <= max - // Advance to first value >= min ui.it.AdvanceIfNeeded(min) ui.updateHasNext() From 6c78d0db9eff4853725304f626d17d3d3835024e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 5 Nov 2025 13:41:21 -0500 Subject: [PATCH 4/7] overflow safety --- iter_test.go | 16 ++++++++++++++++ roaring.go | 12 ++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/iter_test.go b/iter_test.go index 527fc7f9..1128f70e 100644 --- a/iter_test.go +++ b/iter_test.go @@ -291,4 +291,20 @@ func TestUnset(t *testing.T) { assert.Equal(t, expected, actual) }) + + t.Run("extreme max", func(t *testing.T) { + b := New() + b.AddInt(4294967295) + + it := Unset(b, 4294967294, 4294967295) + + actual := make([]uint32, 0) + it(func(val uint32) bool { + actual = append(actual, val) + return true + }) + expected := []uint32{4294967294} + + assert.Equal(t, expected, actual) + }) } diff --git a/roaring.go b/roaring.go index 2bdaaee5..b8e9c08e 100644 --- a/roaring.go +++ b/roaring.go @@ -744,7 +744,7 @@ func (ii *manyIntIterator) Initialize(a *Bitmap) { type unsetIterator struct { min, max uint32 - current uint32 + current uint64 // use uint64 to avoid overflow it IntPeekable hasNext bool } @@ -753,7 +753,7 @@ type unsetIterator struct { func (ui *unsetIterator) Initialize(b *Bitmap, min, max uint32) { ui.min = min ui.max = max - ui.current = min + ui.current = uint64(min) ui.it = b.Iterator() // Advance to first value >= min ui.it.AdvanceIfNeeded(min) @@ -772,11 +772,11 @@ func (ui *unsetIterator) Next() uint32 { result := ui.current ui.current++ ui.updateHasNext() - return result + return uint32(result) } func (ui *unsetIterator) updateHasNext() { - for ui.current <= ui.max { + for ui.current <= uint64(ui.max) { if !ui.it.HasNext() { // No more set bits, we have values to yield ui.hasNext = true @@ -790,7 +790,7 @@ func (ui *unsetIterator) updateHasNext() { return } - if ui.current < nextSet { + if ui.current < uint64(nextSet) { // We have unset values before the next set bit ui.hasNext = true return @@ -798,7 +798,7 @@ func (ui *unsetIterator) updateHasNext() { // Skip the set bit ui.it.Next() - ui.current = nextSet + 1 + ui.current = uint64(nextSet) + 1 } ui.hasNext = false From c1aaedee98185197038ca2fc64e94752f0768011 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 5 Nov 2025 14:07:52 -0500 Subject: [PATCH 5/7] up --- iter_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iter_test.go b/iter_test.go index 1128f70e..f6fcd8b2 100644 --- a/iter_test.go +++ b/iter_test.go @@ -294,7 +294,7 @@ func TestUnset(t *testing.T) { t.Run("extreme max", func(t *testing.T) { b := New() - b.AddInt(4294967295) + b.Add(4294967295) it := Unset(b, 4294967294, 4294967295) From 970a8cba7d743c062cb7f47545eea104704c8160 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 6 Nov 2025 11:16:50 -0500 Subject: [PATCH 6/7] making it return IntPeekable --- iter_test.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++ roaring.go | 33 ++++++++++- 2 files changed, 185 insertions(+), 2 deletions(-) diff --git a/iter_test.go b/iter_test.go index f6fcd8b2..eeffd53a 100644 --- a/iter_test.go +++ b/iter_test.go @@ -308,3 +308,157 @@ func TestUnset(t *testing.T) { assert.Equal(t, expected, actual) }) } + +func TestUnsetIteratorPeekable(t *testing.T) { + t.Run("peek next", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + + it := b.UnsetIterator(3, 10) + + // First value should be 3 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(3), it.PeekNext()) + assert.Equal(t, uint32(3), it.Next()) + + // Next should be 4 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4), it.PeekNext()) + assert.Equal(t, uint32(4), it.Next()) + + // Next should be 6 (skipping 5 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(6), it.PeekNext()) + assert.Equal(t, uint32(6), it.Next()) + + // Next should be 7 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(7), it.PeekNext()) + assert.Equal(t, uint32(7), it.Next()) + + // Next should be 9 (skipping 8 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(9), it.PeekNext()) + assert.Equal(t, uint32(9), it.Next()) + + // Next should be 10 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(10), it.PeekNext()) + assert.Equal(t, uint32(10), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) + + t.Run("advance if needed", func(t *testing.T) { + b := New() + b.AddInt(5) + b.AddInt(8) + b.AddInt(12) + + it := b.UnsetIterator(1, 15) + + // Skip to values >= 7 + it.AdvanceIfNeeded(7) + + // Should now be at 7 (skipping 5 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(7), it.PeekNext()) + assert.Equal(t, uint32(7), it.Next()) + + // Next should be 9 (skipping 8 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(9), it.PeekNext()) + assert.Equal(t, uint32(9), it.Next()) + + // Skip to values >= 11 + it.AdvanceIfNeeded(11) + + // Should now be at 11 (skipping 12 which is set) + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(11), it.PeekNext()) + assert.Equal(t, uint32(11), it.Next()) + + // Next should be 13 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(13), it.PeekNext()) + assert.Equal(t, uint32(13), it.Next()) + + // Skip beyond range + it.AdvanceIfNeeded(20) + assert.False(t, it.HasNext()) + }) + + t.Run("advance if needed before range", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := b.UnsetIterator(10, 15) + + // Try to advance to a value before our range start + it.AdvanceIfNeeded(5) + + // Should still start from 10 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(10), it.PeekNext()) + }) + + t.Run("advance if needed beyond range", func(t *testing.T) { + b := New() + b.AddInt(5) + + it := b.UnsetIterator(10, 15) + + // Advance beyond our range + it.AdvanceIfNeeded(20) + + // Should have no more values + assert.False(t, it.HasNext()) + }) + + t.Run("peek next on empty iterator", func(t *testing.T) { + b := New() + b.AddInt(5) // Set bit in middle of range + + it := b.UnsetIterator(5, 5) // Range contains only the set bit + + // Should have no values + assert.False(t, it.HasNext()) + + // PeekNext should panic when HasNext is false + assert.Panics(t, func() { + it.PeekNext() + }) + }) + + t.Run("range including max uint32 unset", func(t *testing.T) { + b := New() + b.AddInt(4294967294) // Set the value before max + + it := b.UnsetIterator(4294967294, 4294967295) + + // Should have 4294967295 (max uint32) as it's unset + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4294967295), it.PeekNext()) + assert.Equal(t, uint32(4294967295), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) + + t.Run("max uint32 set", func(t *testing.T) { + b := New() + b.AddInt(4294967295) // Set max uint32 + + it := b.UnsetIterator(4294967294, 4294967295) + + // Should have 4294967294 as it's unset, but not 4294967295 + assert.True(t, it.HasNext()) + assert.Equal(t, uint32(4294967294), it.PeekNext()) + assert.Equal(t, uint32(4294967294), it.Next()) + + // No more values + assert.False(t, it.HasNext()) + }) +} diff --git a/roaring.go b/roaring.go index b8e9c08e..f433a581 100644 --- a/roaring.go +++ b/roaring.go @@ -804,6 +804,35 @@ func (ui *unsetIterator) updateHasNext() { ui.hasNext = false } +// PeekNext returns the next value without advancing the iterator +func (ui *unsetIterator) PeekNext() uint32 { + if !ui.hasNext { + panic("PeekNext() called when HasNext() returns false") + } + return uint32(ui.current) +} + +// AdvanceIfNeeded advances the iterator so that the next value is at least minval +func (ui *unsetIterator) AdvanceIfNeeded(minval uint32) { + if minval <= ui.min { + return // Already at or before the start of our range + } + + if minval > ui.max { + // Beyond our range, no more values + ui.hasNext = false + return + } + + // Set current to minval, but make sure we skip any set bits + ui.current = uint64(minval) + + // Advance the internal iterator to be at or beyond minval + ui.it.AdvanceIfNeeded(minval) + + ui.updateHasNext() +} + // String creates a string representation of the Bitmap func (rb *Bitmap) String() string { // inspired by https://github.com/fzandona/goroar/ @@ -886,9 +915,9 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable { return p } -// UnsetIterator creates a new IntIterable to iterate over values in the range [min, max] that are NOT contained in the bitmap. +// UnsetIterator creates a new IntPeekable to iterate over values in the range [min, max] that are NOT contained in the bitmap. // The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). -func (rb *Bitmap) UnsetIterator(min, max uint32) IntIterable { +func (rb *Bitmap) UnsetIterator(min, max uint32) IntPeekable { p := new(unsetIterator) p.Initialize(rb, min, max) return p From 872517eceba7fa374fcad8add6b28f8aeeee1fc4 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 6 Nov 2025 11:18:21 -0500 Subject: [PATCH 7/7] minor fix --- iter_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iter_test.go b/iter_test.go index eeffd53a..4aac2fa2 100644 --- a/iter_test.go +++ b/iter_test.go @@ -434,7 +434,7 @@ func TestUnsetIteratorPeekable(t *testing.T) { t.Run("range including max uint32 unset", func(t *testing.T) { b := New() - b.AddInt(4294967294) // Set the value before max + b.Add(4294967294) // Set the value before max it := b.UnsetIterator(4294967294, 4294967295) @@ -449,7 +449,7 @@ func TestUnsetIteratorPeekable(t *testing.T) { t.Run("max uint32 set", func(t *testing.T) { b := New() - b.AddInt(4294967295) // Set max uint32 + b.Add(4294967295) // Set max uint32 it := b.UnsetIterator(4294967294, 4294967295)