Skip to content

Commit

Permalink
WIP: Kmers.jl compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobnissen committed Dec 30, 2023
1 parent 3de9d43 commit fa55ab3
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 7 deletions.
10 changes: 9 additions & 1 deletion src/alphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,15 @@ end
EncodeError(::A, val::T) where {A,T} = EncodeError{A,T}(val)

function Base.showerror(io::IO, err::EncodeError{A}) where {A}
print(io, "cannot encode ", err.val, " in ", A)
val = err.val
char_repr = if val isa Integer && val < 0x80
repr(val) * " (Char '" * Char(val) * "')"
elseif val isa Union{AbstractString, AbstractChar}
repr(val)

Check warning on line 101 in src/alphabet.jl

View check run for this annotation

Codecov / codecov/patch

src/alphabet.jl#L97-L101

Added lines #L97 - L101 were not covered by tests
else
string(err.val)

Check warning on line 103 in src/alphabet.jl

View check run for this annotation

Codecov / codecov/patch

src/alphabet.jl#L103

Added line #L103 was not covered by tests
end
print(io, "cannot encode " * char_repr * " in ", A)

Check warning on line 105 in src/alphabet.jl

View check run for this annotation

Codecov / codecov/patch

src/alphabet.jl#L105

Added line #L105 was not covered by tests
end

"""
Expand Down
5 changes: 3 additions & 2 deletions src/biosequence/biosequence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ function has_interface(
isempty(syms) && error("Vector syms must not be empty")
first(syms) isa eltype(T) || error("Vector is of wrong element type")
seq = T((i for i in syms))
length(seq) > 0 || return false
length(seq) == length(syms) || return false
eachindex(seq) === Base.OneTo(length(seq)) || return false
E = encoded_data_eltype(T)
e = extract_encoded_element(seq, 1)
Expand All @@ -87,13 +87,14 @@ Base.nextind(::BioSequence, i::Integer) = Int(i) + 1
Base.prevind(::BioSequence, i::Integer) = Int(i) - 1
Base.size(x::BioSequence) = (length(x),)
Base.eltype(::Type{<:BioSequence{A}}) where {A <: Alphabet} = eltype(A)
Base.eltype(x::BioSequence) = eltype(typeof(x))
Alphabet(::Type{<:BioSequence{A}}) where {A <: Alphabet} = A()
Alphabet(x::BioSequence) = Alphabet(typeof(x))
Base.isempty(x::BioSequence) = iszero(length(x))
Base.empty(::Type{T}) where {T <: BioSequence} = T(eltype(T)[])
Base.empty(x::BioSequence) = empty(typeof(x))
BitsPerSymbol(x::BioSequence) = BitsPerSymbol(Alphabet(typeof(x)))
bits_per_symbol(::Type{T}) where {T <: BioSequence} = bits_per_symbol(Alphabet(T))
bits_per_symbol(x::BioSequence) = bits_per_symbol(typeof(x))

Check warning on line 97 in src/biosequence/biosequence.jl

View check run for this annotation

Codecov / codecov/patch

src/biosequence/biosequence.jl#L96-L97

Added lines #L96 - L97 were not covered by tests
Base.hash(s::BioSequence, x::UInt) = foldl((a, b) -> hash(b, a), s, init=x)

function Base.similar(seq::BioSequence, len::Integer=length(seq))
Expand Down
26 changes: 22 additions & 4 deletions src/bit-manipulation/bit-manipulation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,37 @@
include("bitindex.jl")
include("bitpar-compiler.jl")

@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: Base.BitUnsigned
const BitUnsigned = Union{UInt8, UInt16, UInt32, UInt64, UInt128}

@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: BitUnsigned
mask = 0x33333333333333333333333333333333 % T
x = ((x >> 2) & mask) | ((x & mask) << 2)
return reversebits(x, BitsPerSymbol{4}())
end

@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: Base.BitUnsigned
@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: BitUnsigned
mask = 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F % T
x = ((x >> 4) & mask) | ((x & mask) << 4)
return bswap(x)
return reversebits(x, BitsPerSymbol{8}())
end

@inline reversebits(x::T, ::BitsPerSymbol{8}) where T <: BitUnsigned = bswap(x)

@inline reversebits(x::UInt16, ::BitsPerSymbol{16}) = x
@inline function reversebits(x::T, ::BitsPerSymbol{16}) where T <: Union{UInt32, UInt64}
mask = 0x0000FFFF0000FFFF0000FFFF0000FFFF % T
x = ((x >> 16) & mask) | ((x & mask) << 16)
reversebits(x, BitsPerSymbol{32}())

Check warning on line 25 in src/bit-manipulation/bit-manipulation.jl

View check run for this annotation

Codecov / codecov/patch

src/bit-manipulation/bit-manipulation.jl#L21-L25

Added lines #L21 - L25 were not covered by tests
end

@inline reversebits(x::UInt32, ::BitsPerSymbol{32}) = x
@inline function reversebits(x::T, ::BitsPerSymbol{32}) where T <: Union{UInt64}
mask = 0x00000000FFFFFFF00000000FFFFFFFF % T
x = ((x >> 32) & mask) | ((x & mask) << 32)
reversebits(x, BitsPerSymbol{64}())

Check warning on line 32 in src/bit-manipulation/bit-manipulation.jl

View check run for this annotation

Codecov / codecov/patch

src/bit-manipulation/bit-manipulation.jl#L28-L32

Added lines #L28 - L32 were not covered by tests
end

reversebits(x::T, ::BitsPerSymbol{8}) where T <: Base.BitUnsigned = bswap(x)
@inline reversebits(x::UInt64, ::BitsPerSymbol{64}) = x

Check warning on line 35 in src/bit-manipulation/bit-manipulation.jl

View check run for this annotation

Codecov / codecov/patch

src/bit-manipulation/bit-manipulation.jl#L35

Added line #L35 was not covered by tests

@inline function complement_bitpar(x::Unsigned, ::T) where {T<:NucleicAcidAlphabet{2}}
return ~x
Expand Down

0 comments on commit fa55ab3

Please sign in to comment.