diff --git a/bitset.go b/bitset.go index 334367d..161f405 100644 --- a/bitset.go +++ b/bitset.go @@ -53,6 +53,9 @@ const wordSize = uint(64) // the wordSize of a bit set in bytes const wordBytes = wordSize / 8 +// wordMask is wordSize-1, used for bit indexing in a word +const wordMask = wordSize - 1 + // log2WordSize is lg(wordSize) const log2WordSize = uint(6) @@ -1428,3 +1431,209 @@ func (b *BitSet) ShiftRight(bits uint) { b.set[i] = 0 } } + +// OnesBetween returns the number of set bits in the range [from, to). +// The range is inclusive of 'from' and exclusive of 'to'. +// Returns 0 if from >= to. +func (b *BitSet) OnesBetween(from, to uint) uint { + panicIfNull(b) + + if from >= to { + return 0 + } + + // Calculate indices and masks for the starting and ending words + startWord := from >> log2WordSize // Divide by wordSize + endWord := to >> log2WordSize + startOffset := from & wordMask // Mod wordSize + endOffset := to & wordMask + + // Case 1: Bits lie within a single word + if startWord == endWord { + // Create mask for bits between from and to + mask := uint64((1<= startOffset + count = uint(popcount(b.set[startWord] & startMask)) + + // 2b: Count all bits in complete words between start and end + if endWord > startWord+1 { + count += uint(popcntSlice(b.set[startWord+1 : endWord])) + } + + // 2c: Count bits in last word (from start of word to endOffset) + if endOffset > 0 { + endMask := uint64(1<> log2WordSize + bitOffset := outPos & wordMask + + // Write extracted bits, handling word boundary crossing + dst.set[wordIdx] |= extracted << bitOffset + if bitOffset+bitsExtracted > wordSize { + dst.set[wordIdx+1] = extracted >> (wordSize - bitOffset) + } + + outPos += bitsExtracted + } +} + +// Deposit creates a new BitSet and deposits bits according to a mask. +// See DepositTo for details. +func (b *BitSet) Deposit(mask *BitSet) *BitSet { + dst := New(mask.length) + b.DepositTo(mask, dst) + return dst +} + +// DepositTo spreads bits from a compacted form in the BitSet into positions +// specified by mask in dst. This is the inverse operation of Extract. +// +// For example, if mask has bits set at positions 1,4,5, then DepositTo will +// take consecutive bits 0,1,2 from the source BitSet and place them into +// positions 1,4,5 in the destination BitSet. +func (b *BitSet) DepositTo(mask *BitSet, dst *BitSet) { + panicIfNull(b) + panicIfNull(mask) + panicIfNull(dst) + + if len(dst.set) == 0 || len(mask.set) == 0 || len(b.set) == 0 { + return + } + + inPos := uint(0) + length := len(mask.set) + if len(dst.set) < length { + length = len(dst.set) + } + + // Process each word + for i := 0; i < length; i++ { + if mask.set[i] == 0 { + continue // Skip words with no bits to deposit + } + + // Calculate source word index + wordIdx := inPos >> log2WordSize + if wordIdx >= uint(len(b.set)) { + break // No more source bits available + } + + // Get source bits, handling word boundary crossing + sourceBits := b.set[wordIdx] + bitOffset := inPos & wordMask + if wordIdx+1 < uint(len(b.set)) && bitOffset != 0 { + // Combine bits from current and next word + sourceBits = (sourceBits >> bitOffset) | + (b.set[wordIdx+1] << (wordSize - bitOffset)) + } else { + sourceBits >>= bitOffset + } + + // Deposit bits according to mask + dst.set[i] = (dst.set[i] &^ mask.set[i]) | pdep(sourceBits, mask.set[i]) + inPos += uint(popcount(mask.set[i])) + } +} + +//go:generate go run cmd/pextgen/main.go -pkg=bitset + +func pext(w, m uint64) (result uint64) { + var outPos uint + + // Process byte by byte + for i := 0; i < 8; i++ { + shift := i << 3 // i * 8 using bit shift + b := uint8(w >> shift) + mask := uint8(m >> shift) + + extracted := pextLUT[b][mask] + bits := popLUT[mask] + + result |= uint64(extracted) << outPos + outPos += uint(bits) + } + + return result +} + +func pdep(w, m uint64) (result uint64) { + var inPos uint + + // Process byte by byte + for i := 0; i < 8; i++ { + shift := i << 3 // i * 8 using bit shift + mask := uint8(m >> shift) + bits := popLUT[mask] + + // Get the bits we'll deposit from the source + b := uint8(w >> inPos) + + // Deposit them according to the mask for this byte + deposited := pdepLUT[b][mask] + + // Add to result + result |= uint64(deposited) << shift + inPos += uint(bits) + } + + return result +} diff --git a/bitset_test.go b/bitset_test.go index c313f41..77ff334 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -20,6 +20,7 @@ import ( "math/rand" "strconv" "testing" + "time" ) func TestStringer(t *testing.T) { @@ -2231,3 +2232,470 @@ func TestPreviousClear(t *testing.T) { }) } } + +func TestBitSetOnesBetween(t *testing.T) { + testCases := []struct { + name string + input *BitSet + from uint + to uint + expected uint + }{ + {"empty range", New(64).Set(0).Set(1), 5, 5, 0}, + {"invalid range", New(64).Set(0).Set(1), 5, 3, 0}, + {"single word", New(64).Set(1).Set(2).Set(3), 1, 3, 2}, + {"single word full", New(64).Set(0).Set(1).Set(2).Set(3), 0, 4, 4}, + {"cross word boundary", New(128).Set(63).Set(64).Set(65), 63, 66, 3}, + {"multiple words", New(256).Set(0).Set(63).Set(64).Set(127).Set(128), 0, 129, 5}, + {"large gap", New(256).Set(0).Set(100).Set(200), 0, 201, 3}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := tc.input.OnesBetween(tc.from, tc.to) + if got != tc.expected { + t.Errorf("OnesBetween(%d, %d) = %d, want %d", + tc.from, tc.to, got, tc.expected) + } + }) + } + + // Property-based testing + const numTests = 1e5 + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("Seed: %d", seed) + + for i := 0; i < numTests; i++ { + size := uint(rng.Intn(1024) + 64) + bs := New(size) + + // Set random bits + for j := 0; j < int(size/4); j++ { + bs.Set(uint(rng.Intn(int(size)))) + } + + // Generate random range + from := uint(rng.Intn(int(size))) + to := from + uint(rng.Intn(int(size-from))) + + // Compare with naive implementation + got := bs.OnesBetween(from, to) + want := uint(0) + for j := from; j < to; j++ { + if bs.Test(j) { + want++ + } + } + + if got != want { + t.Errorf("Case %d: OnesBetween(%d, %d) = %d, want %d", + i, from, to, got, want) + } + } +} + +func BenchmarkBitSetOnesBetween(b *testing.B) { + sizes := []int{64, 256, 1024, 4096, 16384} + densities := []float64{0.1, 0.5, 0.9} // Different bit densities to test + rng := rand.New(rand.NewSource(42)) + + for _, size := range sizes { + for _, density := range densities { + // Create bitset with given density + bs := New(uint(size)) + for i := 0; i < int(float64(size)*density); i++ { + bs.Set(uint(rng.Intn(size))) + } + + // Generate random ranges + ranges := make([][2]uint, 1000) + for i := range ranges { + from := uint(rng.Intn(size)) + to := from + uint(rng.Intn(size-int(from))) + ranges[i] = [2]uint{from, to} + } + + name := fmt.Sprintf("size=%d/density=%.1f", size, density) + b.Run(name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := ranges[i%len(ranges)] + _ = bs.OnesBetween(r[0], r[1]) + } + }) + } + } +} + +func generatePextTestCases(n int) [][2]uint64 { + cases := make([][2]uint64, n) + for i := range cases { + cases[i][0] = rand.Uint64() + cases[i][1] = rand.Uint64() + } + return cases +} + +func BenchmarkPEXT(b *testing.B) { + // Generate test cases + testCases := generatePextTestCases(1000) + + b.ResetTimer() + + var r uint64 + for i := 0; i < b.N; i++ { + tc := testCases[i%len(testCases)] + r = pext(tc[0], tc[1]) + } + _ = r // prevent optimization +} + +func BenchmarkPDEP(b *testing.B) { + // Generate test cases + testCases := generatePextTestCases(1000) + + b.ResetTimer() + + var r uint64 + for i := 0; i < b.N; i++ { + tc := testCases[i%len(testCases)] + r = pdep(tc[0], tc[1]) + } + _ = r // prevent optimization +} + +func TestPext(t *testing.T) { + const numTests = 1e6 + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("Seed: %d", seed) + + for i := 0; i < numTests; i++ { + w := rng.Uint64() + m := rng.Uint64() + result := pext(w, m) + popCount := popcount(m) + + // Test invariants + if popCount > 0 && result >= (uint64(1)< popcount(w&m) { + t.Fatalf("Case %d: result has more 1s than masked input: result=%x, input&mask=%x", + i, result, w&m) + } + + // Test that extracted bits preserve relative ordering: + // For each bit position that's set in the mask (m): + // 1. Extract a bit from result (resultCopy&1) + // 2. Get corresponding input bit from w (w>>j&1) + // 3. XOR them - if different, bits weren't preserved correctly + resultCopy := result + for j := 0; j < 64; j++ { + // Check if mask bit is set at position j + if m&(uint64(1)<>j&1 gets bit j from original input + // XOR (^) checks if they match + if (resultCopy&1)^(w>>j&1) != 0 { + t.Fatalf("Case %d: bit ordering violation at position %d", i, j) + } + // Shift to examine next bit in packed result + resultCopy >>= 1 + } + } + } +} + +func TestPdep(t *testing.T) { + const numTests = 1e6 + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("Seed: %d", seed) + + for i := 0; i < numTests; i++ { + w := rng.Uint64() // value to deposit + m := rng.Uint64() // mask + result := pdep(w, m) + popCount := popcount(m) + + // Test invariants + if result&^m != 0 { + t.Fatalf("Case %d: result %x has bits set outside of mask %x", + i, result, m) + } + + if popcount(result) > popcount(w) { + t.Fatalf("Case %d: result has more 1s than input: result=%x, input=%x", + i, result, w) + } + + // Verify by using PEXT to extract bits back + // The composition of PEXT(PDEP(x,m),m) should equal x masked to popcount bits + extracted := pext(result, m) + maskBits := (uint64(1) << popCount) - 1 + if (extracted & maskBits) != (w & maskBits) { + t.Fatalf("Case %d: PEXT(PDEP(w,m),m) != w: got=%x, want=%x (w=%x, m=%x)", + i, extracted&maskBits, w&maskBits, w, m) + } + } +} + +func TestBitSetExtract(t *testing.T) { + // Property-based tests + const numTests = 1e4 + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("Seed: %d", seed) + + for i := 0; i < numTests; i++ { + // Create random bitsets + size := uint(rng.Intn(1024) + 64) // Random size between 64-1087 bits + src := New(size) + mask := New(size) + dst := New(size) + + // Set random bits + for j := 0; j < int(size/4); j++ { + src.Set(uint(rng.Intn(int(size)))) + mask.Set(uint(rng.Intn(int(size)))) + } + + // Extract bits + src.ExtractTo(mask, dst) + + // Test invariants + if dst.Count() > src.IntersectionCardinality(mask) { + t.Errorf("Case %d: result has more 1s than masked input", i) + } + + // Test bits are properly extracted and packed + pos := uint(0) + for j := uint(0); j < size; j++ { + if mask.Test(j) { + if src.Test(j) != dst.Test(pos) { + t.Errorf("Case %d: bit ordering violation at source position %d", i, j) + } + pos++ + } + } + } + + // Keep existing test cases + testCases := []struct { + name string + src *BitSet // source bits + mask *BitSet // mask bits + expected *BitSet // expected extracted bits + }{ + { + name: "single bit", + src: New(8).Set(1), // 0b01 + mask: New(8).Set(1), // 0b01 + expected: New(8).Set(0), // 0b1 + }, + { + name: "two sequential bits", + src: New(8).Set(0).Set(1), // 0b11 + mask: New(8).Set(0).Set(1), // 0b11 + expected: New(8).Set(0).Set(1), // 0b11 + }, + { + name: "sparse bits", + src: New(16).Set(0).Set(10), // 0b10000000001 + mask: New(16).Set(0).Set(5).Set(10), // 0b10000100001 + expected: New(8).Set(0).Set(2), // 0b101 + }, + { + name: "masked off bits", + src: New(8).Set(0).Set(1).Set(2).Set(3), // 0b1111 + mask: New(8).Set(0).Set(2), // 0b0101 + expected: New(8).Set(0).Set(1), // 0b11 + }, + { + name: "cross word boundary", + src: New(128).Set(63).Set(64).Set(65), + mask: New(128).Set(63).Set(64).Set(65), + expected: New(8).Set(0).Set(1).Set(2), + }, + { + name: "large gap", + src: New(256).Set(0).Set(100).Set(200), + mask: New(256).Set(0).Set(100).Set(200), + expected: New(8).Set(0).Set(1).Set(2), + }, + { + name: "extracting zeros", + src: New(8), // 0b00 + mask: New(8).Set(0).Set(1).Set(2), // 0b111 + expected: New(8), // 0b000 + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + dst := New(tc.expected.Len()) + tc.src.ExtractTo(tc.mask, dst) + if !dst.Equal(tc.expected) { + t.Errorf("got %v, expected %v", dst, tc.expected) + } + }) + } +} + +func TestBitSetDeposit(t *testing.T) { + // Property-based tests + const numTests = 1e4 + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("Seed: %d", seed) + + for i := 0; i < numTests; i++ { + // Create random bitsets + size := uint(rng.Intn(1024) + 64) // Random size between 64-1087 bits + src := New(size) + mask := New(size) + dst := New(size) + + // Set random bits + for j := 0; j < int(size/4); j++ { + src.Set(uint(rng.Intn(int(mask.Count() + 1)))) + mask.Set(uint(rng.Intn(int(size)))) + } + + // Deposit bits + src.DepositTo(mask, dst) + + // Test invariants + if dst.Count() > src.Count() { + t.Errorf("Case %d: result has more 1s than input", i) + } + + if (dst.Bytes()[0] &^ mask.Bytes()[0]) != 0 { + t.Errorf("Case %d: result has bits set outside of mask", i) + } + + // Extract bits back and verify + extracted := New(size) + dst.ExtractTo(mask, extracted) + maskBits := New(size) + for j := uint(0); j < mask.Count(); j++ { + maskBits.Set(j) + } + srcMasked := src.Clone() + srcMasked.InPlaceIntersection(maskBits) + if !extracted.Equal(srcMasked) { + t.Errorf("Case %d: ExtractTo(DepositTo(x,m),m) != x", i) + } + } + + // Keep existing test cases + testCases := []struct { + name string + src *BitSet // source bits (packed in low positions) + mask *BitSet // mask bits (positions to deposit into) + dst *BitSet // destination bits (initially set) + expected *BitSet // expected result + }{ + { + name: "sparse bits", + src: New(8).Set(0), // 0b01 + mask: New(8).Set(0).Set(5), // 0b100001 + expected: New(8).Set(0), // 0b000001 + }, + { + name: "masked off bits", + src: New(8).Set(0).Set(1), // 0b11 + mask: New(8).Set(0).Set(2), // 0b101 + expected: New(8).Set(0).Set(2), // 0b101 + }, + { + name: "cross word boundary", + src: New(8).Set(0).Set(1), // 0b11 + mask: New(128).Set(63).Set(64), // bits across word boundary + expected: New(128).Set(63).Set(64), // bits deposited across boundary + }, + { + name: "large gaps", + src: New(8).Set(0).Set(1), // 0b11 + mask: New(128).Set(0).Set(100), // widely spaced bits + expected: New(128).Set(0).Set(100), // deposited into sparse positions + }, + { + name: "depositing zeros", + src: New(8), // 0b00 + mask: New(8).Set(0).Set(1).Set(2), // 0b111 + expected: New(8), // 0b000 + }, + { + name: "preserve unmasked bits", + src: New(8), // empty source + mask: New(8), // empty mask + dst: New(8).Set(1).Set(2).Set(3), // dst has some bits set + expected: New(8).Set(1).Set(2).Set(3), // should remain unchanged + }, + { + name: "preserve bits outside mask within word", + src: New(8).Set(0), // source has bit 0 set + mask: New(8).Set(1), // only depositing into bit 1 + dst: New(8).Set(0).Set(2).Set(3), // dst has bits 0,2,3 set + expected: New(8).Set(0).Set(1).Set(2).Set(3), // bits 0,2,3 should remain, bit 1 should be set + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var dst *BitSet + if tc.dst == nil { + dst = New(tc.expected.Len()) + } else { + dst = tc.dst.Clone() + } + tc.src.DepositTo(tc.mask, dst) + if !dst.Equal(tc.expected) { + t.Errorf("got %v, expected %v", dst, tc.expected) + } + }) + } +} + +func BenchmarkBitSetExtractDeposit(b *testing.B) { + sizes := []int{64, 256, 1024, 4096, 16384, 2 << 15} + rng := rand.New(rand.NewSource(42)) // fixed seed for reproducibility + + for _, size := range sizes { + // Create source with random bits + src := New(uint(size)) + for i := 0; i < size/4; i++ { // Set ~25% of bits + src.Set(uint(rng.Intn(size))) + } + + // Create mask with random bits + mask := New(uint(size)) + for i := 0; i < size/4; i++ { + mask.Set(uint(rng.Intn(size))) + } + + b.Run(fmt.Sprintf("size=%d/fn=ExtractTo", size), func(b *testing.B) { + dst := New(uint(size)) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + src.ExtractTo(mask, dst) + dst.ClearAll() + } + }) + + b.Run(fmt.Sprintf("size=%d/fn=DepositTo", size), func(b *testing.B) { + dst := New(uint(size)) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + src.DepositTo(mask, dst) + dst.ClearAll() + } + }) + } +} diff --git a/cmd/pextgen/main.go b/cmd/pextgen/main.go new file mode 100644 index 0000000..312d4b0 --- /dev/null +++ b/cmd/pextgen/main.go @@ -0,0 +1,139 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "math/bits" + "os" +) + +// pextByte handles single-byte PEXT operation +func pextByte(b, m uint8) uint8 { + var result, bitPos uint8 + for i := uint8(0); i < 8; i++ { + if m&(1<