diff --git a/allocator.go b/allocator.go new file mode 100644 index 00000000..2f83fe3d --- /dev/null +++ b/allocator.go @@ -0,0 +1,9 @@ +package roaring + +// Allocator is the interface for allocating various datastructures used +// in this library. Its primary purpose it provides users with the ability +// to control individual allocations in a relatively non-invasive way. +type Allocator interface { + AllocateBytes(size int) []byte + AllocateUInt16s(size int) []uint16 +} diff --git a/arraycontainer.go b/arraycontainer.go index 9541fd53..f76fbffd 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -923,6 +923,14 @@ func newArrayContainer() *arrayContainer { return p } +func newArrayContainerFromAllocator(allocator Allocator) *arrayContainer { + p := new(arrayContainer) + if allocator != nil { + p.content = allocator.AllocateUInt16s(0)[:0] + } + return p +} + func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer { ac := &arrayContainer{} ac.loadData(bc) diff --git a/benchmark_test.go b/benchmark_test.go index 55df2bcf..4210e17c 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -3,16 +3,16 @@ package roaring import ( "bytes" "fmt" - "github.com/stretchr/testify/require" "math/rand" "testing" + "github.com/stretchr/testify/require" + "github.com/bits-and-blooms/bitset" ) // BENCHMARKS, to run them type "go test -bench Benchmark -run -" - // go test -bench BenchmarkIteratorAlloc -benchmem -run - func BenchmarkIteratorAlloc(b *testing.B) { bm := NewBitmap() @@ -84,7 +84,6 @@ func BenchmarkIteratorAlloc(b *testing.B) { b.Fatalf("Cardinalities don't match: %d, %d", counter, expected_cardinality) } - b.Run("many iteration with alloc", func(b *testing.B) { for n := 0; n < b.N; n++ { counter = 0 @@ -117,7 +116,6 @@ func BenchmarkIteratorAlloc(b *testing.B) { } } - // go test -bench BenchmarkOrs -benchmem -run - func BenchmarkOrs(b *testing.B) { @@ -1134,3 +1132,44 @@ func BenchmarkAndAny(b *testing.B) { runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain)) runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain)) } + +type benchAllocator struct { + buf []byte + uint16s []uint16 +} + +func (a benchAllocator) AllocateBytes(size int) []byte { + if size <= cap(a.buf) { + return a.buf[:size] + } + return make([]byte, size) +} + +func (a benchAllocator) AllocateUInt16s(size int) []uint16 { + if size <= cap(a.uint16s) { + return a.uint16s[:size] + } + return make([]uint16, size) +} + +func BenchmarkRepeatedSparseSerialization(b *testing.B) { + var ( + allocator = benchAllocator{ + buf: make([]byte, 4096), + uint16s: make([]uint16, 4096), + } + l = NewWithAllocator(allocator) + buf = bytes.NewBuffer(nil) + ) + for i := 0; i < b.N; i++ { + l.Clear() + for j := 0; j < 16; j++ { + l.Add(uint32(j)) + } + buf.Reset() + _, err := l.WriteTo(buf) + if err != nil { + panic(err) + } + } +} diff --git a/parallel.go b/parallel.go index 9208e3e3..bbe74817 100644 --- a/parallel.go +++ b/parallel.go @@ -167,6 +167,7 @@ func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer make([]bool, 0, expectedKeys), false, }, + nil, } for i := range keys { if containers[i] != nil { // in case a resulting container was empty, see ParAnd function @@ -440,6 +441,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { keys: make([]uint16, containerCount), needCopyOnWrite: make([]bool, containerCount), }, + nil, } resultOffset := 0 diff --git a/roaring.go b/roaring.go index 7220da27..6d1bd0ff 100644 --- a/roaring.go +++ b/roaring.go @@ -18,6 +18,8 @@ import ( // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { highlowcontainer roaringArray + + allocator Allocator } // ToBase64 serializes a bitmap as Base64 @@ -44,13 +46,13 @@ func (rb *Bitmap) FromBase64(str string) (int64, error) { // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { - return rb.highlowcontainer.writeTo(stream) + return rb.highlowcontainer.writeTo(stream, rb.allocator) } // ToBytes returns an array of bytes corresponding to what is written // when calling WriteTo func (rb *Bitmap) ToBytes() ([]byte, error) { - return rb.highlowcontainer.toBytes() + return rb.highlowcontainer.toBytes(rb.allocator) } // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for @@ -63,7 +65,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { func (rb *Bitmap) Checksum() uint64 { const ( offset = 14695981039346656037 - prime = 1099511628211 + prime = 1099511628211 ) var bytes []byte @@ -180,7 +182,7 @@ func (rb *Bitmap) UnmarshalBinary(data []byte) error { // NewBitmap creates a new empty Bitmap (see also New) func NewBitmap() *Bitmap { - return &Bitmap{} + return New() } // New creates a new empty Bitmap (same as NewBitmap) @@ -188,6 +190,14 @@ func New() *Bitmap { return &Bitmap{} } +// NewWithAllocator creates a new empty Bitmap with the provided +// allocator which may be used for various allocations. +func NewWithAllocator(allocator Allocator) *Bitmap { + return &Bitmap{ + allocator: allocator, + } +} + // Clear resets the Bitmap to be logically empty, but may retain // some memory allocations that may speed up future operations func (rb *Bitmap) Clear() { @@ -276,9 +286,9 @@ type intIterator struct { // This way, instead of making up-to 64k allocations per full iteration // we get a single allocation and simply reinitialize the appropriate // iterator and point to it in the generic `iter` member on each key bound. - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerShortIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -341,7 +351,6 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) { // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type IntIterator = intIterator - // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). @@ -357,9 +366,9 @@ type intReverseIterator struct { iter shortIterable highlowcontainer *roaringArray - shortIter reverseIterator - runIter runReverseIterator16 - bitmapIter reverseBitmapContainerShortIterator + shortIter reverseIterator + runIter runReverseIterator16 + bitmapIter reverseBitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -434,9 +443,9 @@ type manyIntIterator struct { iter manyIterable highlowcontainer *roaringArray - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerManyIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerManyIterator } func (ii *manyIntIterator) init() { @@ -495,7 +504,6 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int { return n } - // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type ManyIntIterator = manyIntIterator @@ -569,7 +577,7 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) { // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { - p := new(intIterator) + p := new(intIterator) p.Initialize(rb) return p } @@ -592,7 +600,7 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable { // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { - ptr := new(Bitmap) + ptr := New() ptr.highlowcontainer = *rb.highlowcontainer.clone() return ptr } @@ -720,7 +728,7 @@ func (rb *Bitmap) Add(x uint32) { c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, c) } else { - newac := newArrayContainer() + newac := rb.getNewArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) } } @@ -736,7 +744,7 @@ func (rb *Bitmap) addwithptr(x uint32) (int, container) { rb.highlowcontainer.setContainerAtIndex(i, c) return i, c } - newac := newArrayContainer() + newac := rb.getNewArrayContainer() c = newac.iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c) return -i - 1, c @@ -754,7 +762,7 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool { rb.highlowcontainer.setContainerAtIndex(i, C) return C.getCardinality() > oldcard } - newac := newArrayContainer() + newac := rb.getNewArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) return true @@ -1713,3 +1721,7 @@ func (rb *Bitmap) Stats() Statistics { } return stats } + +func (rb *Bitmap) getNewArrayContainer() container { + return newArrayContainerFromAllocator(rb.allocator) +} diff --git a/roaringarray.go b/roaringarray.go index eeb3d313..293de3d6 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -4,8 +4,9 @@ import ( "bytes" "encoding/binary" "fmt" - "github.com/RoaringBitmap/roaring/internal" "io" + + "github.com/RoaringBitmap/roaring/internal" ) type container interface { @@ -468,21 +469,29 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { - hasRun := ra.hasRunCompression() - isRunSizeInBytes := 0 - cookieSize := 8 +func (ra *roaringArray) writeTo(w io.Writer, allocator Allocator) (n int64, err error) { + var ( + hasRun = ra.hasRunCompression() + isRunSizeInBytes = 0 + cookieSize = 8 + ) if hasRun { cookieSize = 4 isRunSizeInBytes = (len(ra.keys) + 7) / 8 } - descriptiveHeaderSize := 4 * len(ra.keys) - preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize - - buf := make([]byte, preambleSize+4*len(ra.keys)) - - nw := 0 + var ( + descriptiveHeaderSize = 4 * len(ra.keys) + preambleSize = cookieSize + isRunSizeInBytes + descriptiveHeaderSize + bufSizeRequired = preambleSize + 4*len(ra.keys) + buf []byte + nw = 0 + ) + if allocator != nil { + buf = allocator.AllocateBytes(bufSizeRequired) + } else { + buf = make([]byte, bufSizeRequired) + } if hasRun { binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie)) nw += 2 @@ -547,9 +556,9 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) toBytes() ([]byte, error) { +func (ra *roaringArray) toBytes(allocator Allocator) ([]byte, error) { var buf bytes.Buffer - _, err := ra.writeTo(&buf) + _, err := ra.writeTo(&buf, allocator) return buf.Bytes(), err }