Skip to content

Commit

Permalink
Merge pull request #184 from tsenart/ext-dep-ones-between
Browse files Browse the repository at this point in the history
bitset: add range counting and parallel bit extract/deposit operations
  • Loading branch information
lemire authored Dec 9, 2024
2 parents 34a1fd3 + fe3c4b9 commit 2108e73
Show file tree
Hide file tree
Showing 4 changed files with 9,702 additions and 0 deletions.
209 changes: 209 additions & 0 deletions bitset.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ const wordSize = uint(64)
// the wordSize of a bit set in bytes
const wordBytes = wordSize / 8

// wordMask is wordSize-1, used for bit indexing in a word
const wordMask = wordSize - 1

// log2WordSize is lg(wordSize)
const log2WordSize = uint(6)

Expand Down Expand Up @@ -1438,3 +1441,209 @@ func (b *BitSet) ShiftRight(bits uint) {
b.set[i] = 0
}
}

// OnesBetween returns the number of set bits in the range [from, to).
// The range is inclusive of 'from' and exclusive of 'to'.
// Returns 0 if from >= to.
func (b *BitSet) OnesBetween(from, to uint) uint {
panicIfNull(b)

if from >= to {
return 0
}

// Calculate indices and masks for the starting and ending words
startWord := from >> log2WordSize // Divide by wordSize
endWord := to >> log2WordSize
startOffset := from & wordMask // Mod wordSize
endOffset := to & wordMask

// Case 1: Bits lie within a single word
if startWord == endWord {
// Create mask for bits between from and to
mask := uint64((1<<endOffset)-1) &^ ((1 << startOffset) - 1)
return uint(popcount(b.set[startWord] & mask))

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1465 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, macos-latest)

undefined: popcount
}

var count uint

// Case 2: Bits span multiple words
// 2a: Count bits in first word (from startOffset to end of word)
startMask := ^uint64((1 << startOffset) - 1) // Mask for bits >= startOffset
count = uint(popcount(b.set[startWord] & startMask))

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1473 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, macos-latest)

undefined: popcount

// 2b: Count all bits in complete words between start and end
if endWord > startWord+1 {
count += uint(popcntSlice(b.set[startWord+1 : endWord]))
}

// 2c: Count bits in last word (from start of word to endOffset)
if endOffset > 0 {
endMask := uint64(1<<endOffset) - 1 // Mask for bits < endOffset
count += uint(popcount(b.set[endWord] & endMask))

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1483 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, macos-latest)

undefined: popcount
}

return count
}

// Extract extracts bits according to a mask and returns the result
// in a new BitSet. See ExtractTo for details.
func (b *BitSet) Extract(mask *BitSet) *BitSet {
dst := New(mask.Count())
b.ExtractTo(mask, dst)
return dst
}

// ExtractTo copies bits from the BitSet using positions specified in mask
// into a compacted form in dst. The number of set bits in mask determines
// the number of bits that will be extracted.
//
// For example, if mask has bits set at positions 1,4,5, then ExtractTo will
// take bits at those positions from the source BitSet and pack them into
// consecutive positions 0,1,2 in the destination BitSet.
func (b *BitSet) ExtractTo(mask *BitSet, dst *BitSet) {
panicIfNull(b)
panicIfNull(mask)
panicIfNull(dst)

if len(mask.set) == 0 || len(b.set) == 0 {
return
}

// Ensure destination has enough space for extracted bits
resultBits := uint(popcntSlice(mask.set))
if dst.length < resultBits {
dst.extendSet(resultBits - 1)
}

outPos := uint(0)
length := len(mask.set)
if len(b.set) < length {
length = len(b.set)
}

// Process each word
for i := 0; i < length; i++ {
if mask.set[i] == 0 {
continue // Skip words with no bits to extract
}

// Extract and compact bits according to mask
extracted := pext(b.set[i], mask.set[i])
bitsExtracted := uint(popcount(mask.set[i]))

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1533 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, macos-latest)

undefined: popcount

// Calculate destination position
wordIdx := outPos >> log2WordSize
bitOffset := outPos & wordMask

// Write extracted bits, handling word boundary crossing
dst.set[wordIdx] |= extracted << bitOffset
if bitOffset+bitsExtracted > wordSize {
dst.set[wordIdx+1] = extracted >> (wordSize - bitOffset)
}

outPos += bitsExtracted
}
}

// Deposit creates a new BitSet and deposits bits according to a mask.
// See DepositTo for details.
func (b *BitSet) Deposit(mask *BitSet) *BitSet {
dst := New(mask.length)
b.DepositTo(mask, dst)
return dst
}

// DepositTo spreads bits from a compacted form in the BitSet into positions
// specified by mask in dst. This is the inverse operation of Extract.
//
// For example, if mask has bits set at positions 1,4,5, then DepositTo will
// take consecutive bits 0,1,2 from the source BitSet and place them into
// positions 1,4,5 in the destination BitSet.
func (b *BitSet) DepositTo(mask *BitSet, dst *BitSet) {
panicIfNull(b)
panicIfNull(mask)
panicIfNull(dst)

if len(dst.set) == 0 || len(mask.set) == 0 || len(b.set) == 0 {
return
}

inPos := uint(0)
length := len(mask.set)
if len(dst.set) < length {
length = len(dst.set)
}

// Process each word
for i := 0; i < length; i++ {
if mask.set[i] == 0 {
continue // Skip words with no bits to deposit
}

// Calculate source word index
wordIdx := inPos >> log2WordSize
if wordIdx >= uint(len(b.set)) {
break // No more source bits available
}

// Get source bits, handling word boundary crossing
sourceBits := b.set[wordIdx]
bitOffset := inPos & wordMask
if wordIdx+1 < uint(len(b.set)) && bitOffset != 0 {
// Combine bits from current and next word
sourceBits = (sourceBits >> bitOffset) |
(b.set[wordIdx+1] << (wordSize - bitOffset))
} else {
sourceBits >>= bitOffset
}

// Deposit bits according to mask
dst.set[i] = (dst.set[i] &^ mask.set[i]) | pdep(sourceBits, mask.set[i])
inPos += uint(popcount(mask.set[i]))

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.17.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.16.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: popcount

Check failure on line 1603 in bitset.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, macos-latest)

undefined: popcount
}
}

//go:generate go run cmd/pextgen/main.go -pkg=bitset

func pext(w, m uint64) (result uint64) {
var outPos uint

// Process byte by byte
for i := 0; i < 8; i++ {
shift := i << 3 // i * 8 using bit shift
b := uint8(w >> shift)
mask := uint8(m >> shift)

extracted := pextLUT[b][mask]
bits := popLUT[mask]

result |= uint64(extracted) << outPos
outPos += uint(bits)
}

return result
}

func pdep(w, m uint64) (result uint64) {
var inPos uint

// Process byte by byte
for i := 0; i < 8; i++ {
shift := i << 3 // i * 8 using bit shift
mask := uint8(m >> shift)
bits := popLUT[mask]

// Get the bits we'll deposit from the source
b := uint8(w >> inPos)

// Deposit them according to the mask for this byte
deposited := pdepLUT[b][mask]

// Add to result
result |= uint64(deposited) << shift
inPos += uint(bits)
}

return result
}
Loading

0 comments on commit 2108e73

Please sign in to comment.