diff --git a/src/alphabet.jl b/src/alphabet.jl index 06b7ffa6..716a92ff 100644 --- a/src/alphabet.jl +++ b/src/alphabet.jl @@ -94,7 +94,15 @@ end EncodeError(::A, val::T) where {A,T} = EncodeError{A,T}(val) function Base.showerror(io::IO, err::EncodeError{A}) where {A} - print(io, "cannot encode ", err.val, " in ", A) + val = err.val + char_repr = if val isa Integer && val < 0x80 + repr(val) * " (Char '" * Char(val) * "')" + elseif val isa Union{AbstractString, AbstractChar} + repr(val) + else + string(err.val) + end + print(io, "cannot encode " * char_repr * " in ", A) end """ diff --git a/src/biosequence/biosequence.jl b/src/biosequence/biosequence.jl index f71ea95f..a5c3c2d7 100644 --- a/src/biosequence/biosequence.jl +++ b/src/biosequence/biosequence.jl @@ -60,7 +60,7 @@ function has_interface( isempty(syms) && error("Vector syms must not be empty") first(syms) isa eltype(T) || error("Vector is of wrong element type") seq = T((i for i in syms)) - length(seq) > 0 || return false + length(seq) == length(syms) || return false eachindex(seq) === Base.OneTo(length(seq)) || return false E = encoded_data_eltype(T) e = extract_encoded_element(seq, 1) @@ -87,13 +87,14 @@ Base.nextind(::BioSequence, i::Integer) = Int(i) + 1 Base.prevind(::BioSequence, i::Integer) = Int(i) - 1 Base.size(x::BioSequence) = (length(x),) Base.eltype(::Type{<:BioSequence{A}}) where {A <: Alphabet} = eltype(A) -Base.eltype(x::BioSequence) = eltype(typeof(x)) Alphabet(::Type{<:BioSequence{A}}) where {A <: Alphabet} = A() Alphabet(x::BioSequence) = Alphabet(typeof(x)) Base.isempty(x::BioSequence) = iszero(length(x)) Base.empty(::Type{T}) where {T <: BioSequence} = T(eltype(T)[]) Base.empty(x::BioSequence) = empty(typeof(x)) BitsPerSymbol(x::BioSequence) = BitsPerSymbol(Alphabet(typeof(x))) +bits_per_symbol(::Type{T}) where {T <: BioSequence} = bits_per_symbol(Alphabet(T)) +bits_per_symbol(x::BioSequence) = bits_per_symbol(typeof(x)) Base.hash(s::BioSequence, x::UInt) = foldl((a, b) -> hash(b, a), s, init=x) function Base.similar(seq::BioSequence, len::Integer=length(seq)) diff --git a/src/bit-manipulation/bit-manipulation.jl b/src/bit-manipulation/bit-manipulation.jl index cb447287..8623c5a8 100644 --- a/src/bit-manipulation/bit-manipulation.jl +++ b/src/bit-manipulation/bit-manipulation.jl @@ -2,19 +2,37 @@ include("bitindex.jl") include("bitpar-compiler.jl") -@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: Base.BitUnsigned +const BitUnsigned = Union{UInt8, UInt16, UInt32, UInt64, UInt128} + +@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: BitUnsigned mask = 0x33333333333333333333333333333333 % T x = ((x >> 2) & mask) | ((x & mask) << 2) return reversebits(x, BitsPerSymbol{4}()) end -@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: Base.BitUnsigned +@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: BitUnsigned mask = 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F % T x = ((x >> 4) & mask) | ((x & mask) << 4) - return bswap(x) + return reversebits(x, BitsPerSymbol{8}()) +end + +@inline reversebits(x::T, ::BitsPerSymbol{8}) where T <: BitUnsigned = bswap(x) + +@inline reversebits(x::UInt16, ::BitsPerSymbol{16}) = x +@inline function reversebits(x::T, ::BitsPerSymbol{16}) where T <: Union{UInt32, UInt64} + mask = 0x0000FFFF0000FFFF0000FFFF0000FFFF % T + x = ((x >> 16) & mask) | ((x & mask) << 16) + reversebits(x, BitsPerSymbol{32}()) +end + +@inline reversebits(x::UInt32, ::BitsPerSymbol{32}) = x +@inline function reversebits(x::T, ::BitsPerSymbol{32}) where T <: Union{UInt64} + mask = 0x00000000FFFFFFF00000000FFFFFFFF % T + x = ((x >> 32) & mask) | ((x & mask) << 32) + reversebits(x, BitsPerSymbol{64}()) end -reversebits(x::T, ::BitsPerSymbol{8}) where T <: Base.BitUnsigned = bswap(x) +@inline reversebits(x::UInt64, ::BitsPerSymbol{64}) = x @inline function complement_bitpar(x::Unsigned, ::T) where {T<:NucleicAcidAlphabet{2}} return ~x