From 29ca5d03f88ad50463ed0b937f52b73f27fa2152 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Wed, 7 Sep 2022 21:17:53 -0500 Subject: [PATCH 1/4] draft of allocator interface --- allocator.go | 25 ++++++++++++++++++++++ arraycontainer.go | 6 ++++++ benchmark_test.go | 47 +++++++++++++++++++++++++++++++++++++---- parallel.go | 2 ++ roaring.go | 54 +++++++++++++++++++++++++++++------------------ roaringarray.go | 30 ++++++++++++++------------ 6 files changed, 127 insertions(+), 37 deletions(-) create mode 100644 allocator.go diff --git a/allocator.go b/allocator.go new file mode 100644 index 00000000..8d3fbfc4 --- /dev/null +++ b/allocator.go @@ -0,0 +1,25 @@ +package roaring + +// Allocator is the interface for allocating various datastructures used +// in this library. Its primary purpose it provides users with the ability +// to control individual allocations in a relatively non-invasive way. +type Allocator interface { + AllocateBytes(size, capacity int) []byte + AllocateUInt16s(size, capacity int) []uint16 +} + +// defaultAllocator implements Allocator by just deferring to the default +// Go allocator. +// +// This struct has non-pointer receivers so it does not require an additional +// allocation to be instantiated as part of a larger struct. +type defaultAllocator struct { +} + +func (a defaultAllocator) AllocateBytes(size, capacity int) []byte { + return make([]byte, size, capacity) +} + +func (a defaultAllocator) AllocateUInt16s(size, capacity int) []uint16 { + return make([]uint16, size, capacity) +} diff --git a/arraycontainer.go b/arraycontainer.go index 9541fd53..87870013 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -923,6 +923,12 @@ func newArrayContainer() *arrayContainer { return p } +func newArrayContainerFromAllocator(allocator Allocator) *arrayContainer { + p := new(arrayContainer) + p.content = allocator.AllocateUInt16s(0, arrayDefaultMaxSize)[:0] + return p +} + func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer { ac := &arrayContainer{} ac.loadData(bc) diff --git a/benchmark_test.go b/benchmark_test.go index 55df2bcf..dfff40a1 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -3,16 +3,16 @@ package roaring import ( "bytes" "fmt" - "github.com/stretchr/testify/require" "math/rand" "testing" + "github.com/stretchr/testify/require" + "github.com/bits-and-blooms/bitset" ) // BENCHMARKS, to run them type "go test -bench Benchmark -run -" - // go test -bench BenchmarkIteratorAlloc -benchmem -run - func BenchmarkIteratorAlloc(b *testing.B) { bm := NewBitmap() @@ -84,7 +84,6 @@ func BenchmarkIteratorAlloc(b *testing.B) { b.Fatalf("Cardinalities don't match: %d, %d", counter, expected_cardinality) } - b.Run("many iteration with alloc", func(b *testing.B) { for n := 0; n < b.N; n++ { counter = 0 @@ -117,7 +116,6 @@ func BenchmarkIteratorAlloc(b *testing.B) { } } - // go test -bench BenchmarkOrs -benchmem -run - func BenchmarkOrs(b *testing.B) { @@ -1134,3 +1132,44 @@ func BenchmarkAndAny(b *testing.B) { runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain)) runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain)) } + +type benchAllocator struct { + buf []byte + uint16s []uint16 +} + +func (a benchAllocator) AllocateBytes(size, capacity int) []byte { + if size <= cap(a.buf) && capacity <= cap(a.buf) { + return a.buf[:size:capacity] + } + return make([]byte, size, capacity) +} + +func (a benchAllocator) AllocateUInt16s(size, capacity int) []uint16 { + if size <= cap(a.uint16s) && capacity <= cap(a.uint16s) { + return a.uint16s[:size:capacity] + } + return make([]uint16, size, capacity) +} + +func BenchmarkRepeatedSparseSerialization(b *testing.B) { + var ( + allocator = benchAllocator{ + buf: make([]byte, 4096), + uint16s: make([]uint16, 4096), + } + l = NewWithAllocator(allocator) + buf = bytes.NewBuffer(nil) + ) + for i := 0; i < b.N; i++ { + l.Clear() + for j := 0; j < 16; j++ { + l.Add(uint32(j)) + } + buf.Reset() + _, err := l.WriteTo(buf) + if err != nil { + panic(err) + } + } +} diff --git a/parallel.go b/parallel.go index 9208e3e3..8b9fc1c7 100644 --- a/parallel.go +++ b/parallel.go @@ -167,6 +167,7 @@ func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer make([]bool, 0, expectedKeys), false, }, + &defaultAllocator{}, } for i := range keys { if containers[i] != nil { // in case a resulting container was empty, see ParAnd function @@ -440,6 +441,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { keys: make([]uint16, containerCount), needCopyOnWrite: make([]bool, containerCount), }, + &defaultAllocator{}, } resultOffset := 0 diff --git a/roaring.go b/roaring.go index 7220da27..d0cc2402 100644 --- a/roaring.go +++ b/roaring.go @@ -18,6 +18,8 @@ import ( // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { highlowcontainer roaringArray + + allocator Allocator } // ToBase64 serializes a bitmap as Base64 @@ -44,13 +46,13 @@ func (rb *Bitmap) FromBase64(str string) (int64, error) { // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { - return rb.highlowcontainer.writeTo(stream) + return rb.highlowcontainer.writeTo(stream, rb.allocator) } // ToBytes returns an array of bytes corresponding to what is written // when calling WriteTo func (rb *Bitmap) ToBytes() ([]byte, error) { - return rb.highlowcontainer.toBytes() + return rb.highlowcontainer.toBytes(rb.allocator) } // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for @@ -63,7 +65,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { func (rb *Bitmap) Checksum() uint64 { const ( offset = 14695981039346656037 - prime = 1099511628211 + prime = 1099511628211 ) var bytes []byte @@ -180,12 +182,22 @@ func (rb *Bitmap) UnmarshalBinary(data []byte) error { // NewBitmap creates a new empty Bitmap (see also New) func NewBitmap() *Bitmap { - return &Bitmap{} + return New() } // New creates a new empty Bitmap (same as NewBitmap) func New() *Bitmap { - return &Bitmap{} + return &Bitmap{ + allocator: defaultAllocator{}, + } +} + +// NewWithAllocator creates a new empty Bitmap with the provided +// allocator which may be used for various allocations. +func NewWithAllocator(allocator Allocator) *Bitmap { + return &Bitmap{ + allocator: allocator, + } } // Clear resets the Bitmap to be logically empty, but may retain @@ -276,9 +288,9 @@ type intIterator struct { // This way, instead of making up-to 64k allocations per full iteration // we get a single allocation and simply reinitialize the appropriate // iterator and point to it in the generic `iter` member on each key bound. - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerShortIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -341,7 +353,6 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) { // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type IntIterator = intIterator - // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). @@ -357,9 +368,9 @@ type intReverseIterator struct { iter shortIterable highlowcontainer *roaringArray - shortIter reverseIterator - runIter runReverseIterator16 - bitmapIter reverseBitmapContainerShortIterator + shortIter reverseIterator + runIter runReverseIterator16 + bitmapIter reverseBitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -434,9 +445,9 @@ type manyIntIterator struct { iter manyIterable highlowcontainer *roaringArray - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerManyIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerManyIterator } func (ii *manyIntIterator) init() { @@ -495,7 +506,6 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int { return n } - // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type ManyIntIterator = manyIntIterator @@ -569,7 +579,7 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) { // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { - p := new(intIterator) + p := new(intIterator) p.Initialize(rb) return p } @@ -720,7 +730,7 @@ func (rb *Bitmap) Add(x uint32) { c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, c) } else { - newac := newArrayContainer() + newac := rb.getNewArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) } } @@ -736,7 +746,7 @@ func (rb *Bitmap) addwithptr(x uint32) (int, container) { rb.highlowcontainer.setContainerAtIndex(i, c) return i, c } - newac := newArrayContainer() + newac := rb.getNewArrayContainer() c = newac.iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c) return -i - 1, c @@ -754,7 +764,7 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool { rb.highlowcontainer.setContainerAtIndex(i, C) return C.getCardinality() > oldcard } - newac := newArrayContainer() + newac := rb.getNewArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) return true @@ -1713,3 +1723,7 @@ func (rb *Bitmap) Stats() Statistics { } return stats } + +func (rb *Bitmap) getNewArrayContainer() container { + return newArrayContainerFromAllocator(rb.allocator) +} diff --git a/roaringarray.go b/roaringarray.go index eeb3d313..a5a7ca79 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -4,8 +4,9 @@ import ( "bytes" "encoding/binary" "fmt" - "github.com/RoaringBitmap/roaring/internal" "io" + + "github.com/RoaringBitmap/roaring/internal" ) type container interface { @@ -468,21 +469,24 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { - hasRun := ra.hasRunCompression() - isRunSizeInBytes := 0 - cookieSize := 8 +func (ra *roaringArray) writeTo(w io.Writer, allocator Allocator) (n int64, err error) { + var ( + hasRun = ra.hasRunCompression() + isRunSizeInBytes = 0 + cookieSize = 8 + ) if hasRun { cookieSize = 4 isRunSizeInBytes = (len(ra.keys) + 7) / 8 } - descriptiveHeaderSize := 4 * len(ra.keys) - preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize - - buf := make([]byte, preambleSize+4*len(ra.keys)) - - nw := 0 + var ( + descriptiveHeaderSize = 4 * len(ra.keys) + preambleSize = cookieSize + isRunSizeInBytes + descriptiveHeaderSize + bufSizeRequired = preambleSize + 4*len(ra.keys) + buf = allocator.AllocateBytes(bufSizeRequired, bufSizeRequired) + nw = 0 + ) if hasRun { binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie)) nw += 2 @@ -547,9 +551,9 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) toBytes() ([]byte, error) { +func (ra *roaringArray) toBytes(allocator Allocator) ([]byte, error) { var buf bytes.Buffer - _, err := ra.writeTo(&buf) + _, err := ra.writeTo(&buf, allocator) return buf.Bytes(), err } From 3ab317cec6336d5c3b46c7c632453d1e53640e2f Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Wed, 7 Sep 2022 21:41:43 -0500 Subject: [PATCH 2/4] make it work with nil --- allocator.go | 16 ---------------- arraycontainer.go | 4 +++- parallel.go | 4 ++-- roaring.go | 6 ++---- roaringarray.go | 7 ++++++- 5 files changed, 13 insertions(+), 24 deletions(-) diff --git a/allocator.go b/allocator.go index 8d3fbfc4..4c2bef76 100644 --- a/allocator.go +++ b/allocator.go @@ -7,19 +7,3 @@ type Allocator interface { AllocateBytes(size, capacity int) []byte AllocateUInt16s(size, capacity int) []uint16 } - -// defaultAllocator implements Allocator by just deferring to the default -// Go allocator. -// -// This struct has non-pointer receivers so it does not require an additional -// allocation to be instantiated as part of a larger struct. -type defaultAllocator struct { -} - -func (a defaultAllocator) AllocateBytes(size, capacity int) []byte { - return make([]byte, size, capacity) -} - -func (a defaultAllocator) AllocateUInt16s(size, capacity int) []uint16 { - return make([]uint16, size, capacity) -} diff --git a/arraycontainer.go b/arraycontainer.go index 87870013..b78399fe 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -925,7 +925,9 @@ func newArrayContainer() *arrayContainer { func newArrayContainerFromAllocator(allocator Allocator) *arrayContainer { p := new(arrayContainer) - p.content = allocator.AllocateUInt16s(0, arrayDefaultMaxSize)[:0] + if allocator != nil { + p.content = allocator.AllocateUInt16s(0, arrayDefaultMaxSize)[:0] + } return p } diff --git a/parallel.go b/parallel.go index 8b9fc1c7..bbe74817 100644 --- a/parallel.go +++ b/parallel.go @@ -167,7 +167,7 @@ func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer make([]bool, 0, expectedKeys), false, }, - &defaultAllocator{}, + nil, } for i := range keys { if containers[i] != nil { // in case a resulting container was empty, see ParAnd function @@ -441,7 +441,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { keys: make([]uint16, containerCount), needCopyOnWrite: make([]bool, containerCount), }, - &defaultAllocator{}, + nil, } resultOffset := 0 diff --git a/roaring.go b/roaring.go index d0cc2402..6d1bd0ff 100644 --- a/roaring.go +++ b/roaring.go @@ -187,9 +187,7 @@ func NewBitmap() *Bitmap { // New creates a new empty Bitmap (same as NewBitmap) func New() *Bitmap { - return &Bitmap{ - allocator: defaultAllocator{}, - } + return &Bitmap{} } // NewWithAllocator creates a new empty Bitmap with the provided @@ -602,7 +600,7 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable { // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { - ptr := new(Bitmap) + ptr := New() ptr.highlowcontainer = *rb.highlowcontainer.clone() return ptr } diff --git a/roaringarray.go b/roaringarray.go index a5a7ca79..f1e7d54a 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -484,9 +484,14 @@ func (ra *roaringArray) writeTo(w io.Writer, allocator Allocator) (n int64, err descriptiveHeaderSize = 4 * len(ra.keys) preambleSize = cookieSize + isRunSizeInBytes + descriptiveHeaderSize bufSizeRequired = preambleSize + 4*len(ra.keys) - buf = allocator.AllocateBytes(bufSizeRequired, bufSizeRequired) + buf []byte nw = 0 ) + if allocator != nil { + buf = allocator.AllocateBytes(bufSizeRequired, bufSizeRequired) + } else { + buf = make([]byte, bufSizeRequired) + } if hasRun { binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie)) nw += 2 From e9ca7816ea8a0f7a1d22a1098d49d377b0ba7e11 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Wed, 7 Sep 2022 21:46:34 -0500 Subject: [PATCH 3/4] pass 0 instead of sparseArrayMax --- benchmark_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark_test.go b/benchmark_test.go index dfff40a1..2de43b84 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -1140,14 +1140,14 @@ type benchAllocator struct { func (a benchAllocator) AllocateBytes(size, capacity int) []byte { if size <= cap(a.buf) && capacity <= cap(a.buf) { - return a.buf[:size:capacity] + return a.buf[:size] } return make([]byte, size, capacity) } func (a benchAllocator) AllocateUInt16s(size, capacity int) []uint16 { if size <= cap(a.uint16s) && capacity <= cap(a.uint16s) { - return a.uint16s[:size:capacity] + return a.uint16s[:size] } return make([]uint16, size, capacity) } From 9a66ae49e469bfcdcecd19fd2328d2e2352da500 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Wed, 7 Sep 2022 21:49:34 -0500 Subject: [PATCH 4/4] simplify interface --- allocator.go | 4 ++-- arraycontainer.go | 2 +- benchmark_test.go | 12 ++++++------ roaringarray.go | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/allocator.go b/allocator.go index 4c2bef76..2f83fe3d 100644 --- a/allocator.go +++ b/allocator.go @@ -4,6 +4,6 @@ package roaring // in this library. Its primary purpose it provides users with the ability // to control individual allocations in a relatively non-invasive way. type Allocator interface { - AllocateBytes(size, capacity int) []byte - AllocateUInt16s(size, capacity int) []uint16 + AllocateBytes(size int) []byte + AllocateUInt16s(size int) []uint16 } diff --git a/arraycontainer.go b/arraycontainer.go index b78399fe..f76fbffd 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -926,7 +926,7 @@ func newArrayContainer() *arrayContainer { func newArrayContainerFromAllocator(allocator Allocator) *arrayContainer { p := new(arrayContainer) if allocator != nil { - p.content = allocator.AllocateUInt16s(0, arrayDefaultMaxSize)[:0] + p.content = allocator.AllocateUInt16s(0)[:0] } return p } diff --git a/benchmark_test.go b/benchmark_test.go index 2de43b84..4210e17c 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -1138,18 +1138,18 @@ type benchAllocator struct { uint16s []uint16 } -func (a benchAllocator) AllocateBytes(size, capacity int) []byte { - if size <= cap(a.buf) && capacity <= cap(a.buf) { +func (a benchAllocator) AllocateBytes(size int) []byte { + if size <= cap(a.buf) { return a.buf[:size] } - return make([]byte, size, capacity) + return make([]byte, size) } -func (a benchAllocator) AllocateUInt16s(size, capacity int) []uint16 { - if size <= cap(a.uint16s) && capacity <= cap(a.uint16s) { +func (a benchAllocator) AllocateUInt16s(size int) []uint16 { + if size <= cap(a.uint16s) { return a.uint16s[:size] } - return make([]uint16, size, capacity) + return make([]uint16, size) } func BenchmarkRepeatedSparseSerialization(b *testing.B) { diff --git a/roaringarray.go b/roaringarray.go index f1e7d54a..293de3d6 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -488,7 +488,7 @@ func (ra *roaringArray) writeTo(w io.Writer, allocator Allocator) (n int64, err nw = 0 ) if allocator != nil { - buf = allocator.AllocateBytes(bufSizeRequired, bufSizeRequired) + buf = allocator.AllocateBytes(bufSizeRequired) } else { buf = make([]byte, bufSizeRequired) }