Skip to content

Commit

Permalink
WIP: Kmers.jl compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobnissen committed Oct 23, 2024
1 parent 0047647 commit 700b564
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 7 deletions.
10 changes: 9 additions & 1 deletion src/alphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,15 @@ end
EncodeError(::A, val::T) where {A,T} = EncodeError{A,T}(val)

function Base.showerror(io::IO, err::EncodeError{A}) where {A}
print(io, "cannot encode ", repr(err.val), " in ", A)
val = err.val
char_repr = if val isa Integer && val < 0x80
repr(val) * " (Char '" * Char(val) * "')"
elseif val isa Union{AbstractString, AbstractChar}
repr(val)
else
string(val)
end
print(io, "cannot encode " * char_repr * " in ", A)
end

"""
Expand Down
5 changes: 3 additions & 2 deletions src/biosequence/biosequence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ function has_interface(
isempty(syms) && error("Vector syms must not be empty")
first(syms) isa eltype(T) || error("Vector is of wrong element type")
seq = T((i for i in syms))
length(seq) > 0 || return false
length(seq) == length(syms) || return false
eachindex(seq) === Base.OneTo(length(seq)) || return false
E = encoded_data_eltype(T)
e = extract_encoded_element(seq, 1)
Expand All @@ -87,13 +87,14 @@ Base.nextind(::BioSequence, i::Integer) = Int(i) + 1
Base.prevind(::BioSequence, i::Integer) = Int(i) - 1
Base.size(x::BioSequence) = (length(x),)
Base.eltype(::Type{<:BioSequence{A}}) where {A <: Alphabet} = eltype(A)
Base.eltype(x::BioSequence) = eltype(typeof(x))
Alphabet(::Type{<:BioSequence{A}}) where {A <: Alphabet} = A()
Alphabet(x::BioSequence) = Alphabet(typeof(x))
Base.isempty(x::BioSequence) = iszero(length(x))
Base.empty(::Type{T}) where {T <: BioSequence} = T(eltype(T)[])
Base.empty(x::BioSequence) = empty(typeof(x))
BitsPerSymbol(x::BioSequence) = BitsPerSymbol(Alphabet(typeof(x)))
bits_per_symbol(::Type{T}) where {T <: BioSequence} = bits_per_symbol(Alphabet(T))
bits_per_symbol(x::BioSequence) = bits_per_symbol(typeof(x))
Base.hash(s::BioSequence, x::UInt) = foldl((a, b) -> hash(b, a), s, init=x)

function Base.similar(seq::BioSequence, len::Integer=length(seq))
Expand Down
29 changes: 25 additions & 4 deletions src/bit-manipulation/bit-manipulation.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: Base.BitUnsigned

include("bitindex.jl")

const BitUnsigned = Union{UInt8, UInt16, UInt32, UInt64, UInt128}

@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: BitUnsigned
mask = 0x33333333333333333333333333333333 % T
x = ((x >> 2) & mask) | ((x & mask) << 2)
return reversebits(x, BitsPerSymbol{4}())
end

@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: Base.BitUnsigned
@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: BitUnsigned
mask = 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F % T
x = ((x >> 4) & mask) | ((x & mask) << 4)
return bswap(x)
return reversebits(x, BitsPerSymbol{8}())
end

@inline reversebits(x::T, ::BitsPerSymbol{8}) where T <: BitUnsigned = bswap(x)

@inline reversebits(x::UInt16, ::BitsPerSymbol{16}) = x
@inline function reversebits(x::T, ::BitsPerSymbol{16}) where T <: Union{UInt32, UInt64}
mask = 0x0000FFFF0000FFFF0000FFFF0000FFFF % T
x = ((x >> 16) & mask) | ((x & mask) << 16)
reversebits(x, BitsPerSymbol{32}())
end

@inline reversebits(x::UInt32, ::BitsPerSymbol{32}) = x
@inline function reversebits(x::T, ::BitsPerSymbol{32}) where T <: Union{UInt64}
mask = 0x00000000FFFFFFF00000000FFFFFFFF % T
x = ((x >> 32) & mask) | ((x & mask) << 32)
reversebits(x, BitsPerSymbol{64}())
end

reversebits(x::T, ::BitsPerSymbol{8}) where T <: Base.BitUnsigned = bswap(x)
@inline reversebits(x::UInt64, ::BitsPerSymbol{64}) = x

@inline function complement_bitpar(x::Unsigned, ::T) where {T<:NucleicAcidAlphabet{2}}
return ~x
Expand Down

0 comments on commit 700b564

Please sign in to comment.