From 50cb02efdeabbec4696c424a9bd4e1a3b8e26382 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Thu, 16 Jun 2022 16:27:55 +0200 Subject: [PATCH] Invert parsing and construct from string --- src/BioSequences.jl | 3 +++ src/longsequences/constructors.jl | 33 +++++++++++++++++-------------- src/longsequences/copying.jl | 2 -- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/BioSequences.jl b/src/BioSequences.jl index aef223a9..f4e3c1fe 100644 --- a/src/BioSequences.jl +++ b/src/BioSequences.jl @@ -204,6 +204,9 @@ using Random BioSymbols.gap(::Type{Char}) = '-' +const SeqLike = Union{AbstractVector, AbstractString} +const ASCIILike = Union{String, SubString{String}} + include("alphabet.jl") # Load the bit-twiddling internals that optimised BioSequences methods depend on. diff --git a/src/longsequences/constructors.jl b/src/longsequences/constructors.jl index abdd2f94..2a69327e 100644 --- a/src/longsequences/constructors.jl +++ b/src/longsequences/constructors.jl @@ -60,20 +60,8 @@ function (::Type{T})(seq::LongSequence{<:NucleicAcidAlphabet{N}}) where end # Constructors from strings -function LongSequence{A}(s::Union{String, SubString{String}}) where {A<:Alphabet} - return LongSequence{A}(s, codetype(A())) -end - -# Generic method for String/Substring. -function LongSequence{A}(s::Union{String, SubString{String}}, ::AlphabetCode) where {A<:Alphabet} - len = length(s) - seq = LongSequence{A}(undef, len) - return copyto!(seq, 1, s, 1, len) -end - -function LongSequence{A}(s::Union{String, SubString{String}}, ::AsciiAlphabet) where {A<:Alphabet} - seq = LongSequence{A}(undef, ncodeunits(s)) - return encode_chunks!(seq, 1, codeunits(s), 1, ncodeunits(s)) +function LongSequence{A}(s::AbstractString) where {A <: Alphabet} + return parse(LongSequence{A}, s) end function LongSequence{A}( @@ -85,4 +73,19 @@ function LongSequence{A}( return copyto!(seq, 1, src, first(part), len) end -Base.parse(::Type{LongSequence{A}}, seq::AbstractString) where A = LongSequence{A}(seq) \ No newline at end of file +Base.parse(::Type{T}, s::AbstractString) where {T <: LongSequence} = parse(T, String(s)) + +function Base.parse(::Type{LongSequence{A}}, seq::ASCIILike) where {A<:Alphabet} + _parse(LongSequence{A}, seq, codetype(A())) +end + +function _parse(::Type{LongSequence{A}}, s::ASCIILike, ::AlphabetCode) where {A<:Alphabet} + len = length(s) + seq = LongSequence{A}(undef, len) + return copyto!(seq, 1, s, 1, len) +end + +function _parse(::Type{LongSequence{A}}, s::ASCIILike, ::AsciiAlphabet) where {A<:Alphabet} + seq = LongSequence{A}(undef, ncodeunits(s)) + return encode_chunks!(seq, 1, codeunits(s), 1, ncodeunits(s)) +end diff --git a/src/longsequences/copying.jl b/src/longsequences/copying.jl index f939a811..1a37304e 100644 --- a/src/longsequences/copying.jl +++ b/src/longsequences/copying.jl @@ -107,8 +107,6 @@ function _copyto!(dst::SeqOrView{A}, doff::Integer, end ######### -const SeqLike = Union{AbstractVector, AbstractString} -const ASCIILike = Union{String, SubString{String}} """ copy!(dst::LongSequence, src)