From c250148cc0a5c8fda8e7d2b68337561f23c47dd7 Mon Sep 17 00:00:00 2001 From: Gregor Kappler Date: Mon, 30 Aug 2021 21:08:37 +0200 Subject: [PATCH] _iterate -> iterate_state _iterate -> iterate_state --- src/CombinedParsers.jl | 134 ++++++++++++++++++++-------------------- src/assertions.jl | 19 +++--- src/caseless.jl | 4 +- src/constant.jl | 12 ++-- src/deepmap.jl | 2 +- src/indexed_captures.jl | 8 +-- src/match.jl | 18 +++--- src/memoize.jl | 14 ++--- src/pcre.jl | 10 +-- src/re.jl | 42 ++++++------- src/reverse.jl | 10 +-- src/state.jl | 4 +- src/textparse.jl | 10 +-- src/trie.jl | 12 ++-- src/valuematcher.jl | 4 +- test/test-parser.jl | 2 +- 16 files changed, 147 insertions(+), 158 deletions(-) diff --git a/src/CombinedParsers.jl b/src/CombinedParsers.jl index 89323e2..ef759cb 100644 --- a/src/CombinedParsers.jl +++ b/src/CombinedParsers.jl @@ -1,6 +1,6 @@ # TODO: # - remove after from get (nextind with state and i) -# - (Feedback appreciated: Would is be more efficient change the `_iterate` internal API for the first match to arity 4?) +# - (Feedback appreciated: Would is be more efficient change the `iterate_state` internal API for the first match to arity 4?) # - Base.get(parser, sequence, till, after, i, state) to # Base.get(parser, sequence, i, after, till, state) """ @@ -32,7 +32,7 @@ import AbstractTrees: print_tree, printnode export CombinedParser export result_type -"Julia types that provide CombinedParser methods result_type, state_type, _iterate, get, nextind, prevind." +"Julia types that provide CombinedParser methods result_type, state_type, iterate_state, get, nextind, prevind." ## Pair{<:Union{AbstractToken, AbstractString, Char, Regex, Pair},<:Any} } export parser import Base: convert @@ -110,10 +110,10 @@ print_constructor(io::IO,x) = end -export _iterate +export iterate_state """ - _iterate(parser, sequence, till::Int, posi::Int[, next_i[, state=nothing]]) + iterate_state(parser, sequence, till::Int, posi::Int[, next_i=posi[, state=nothing]]) Return position `after` next match of `parser` in `sequence` at `posi`. The next match is following current match `state` (first match iif `state==nothing`). @@ -127,17 +127,15 @@ If no next match is found, return `nothing`. - `rightof(sequence,posi,parser,state)==next_i`, the position after the `state`-matching subsequence. - `sequence[leftof(sequence,next_i,parser,state):_prevind(sequence,next_i)]` is the matched subsequence. -Dispatches to `_iterate(parser, sequence,till,posi,posi,nothing)` to . - !!! note - custom `_iterate` implementations *must* return + Writing a custom `iterate_state` implementations *must* return - `nothing` if no match is found - `Tuple{Int64,state_type(parser)}` with next position, match state if a match is found. """ -@inline _iterate(parser::CombinedParser, sequence, till::Int, posi::Int) = - _iterate(parser, sequence,till,posi,posi,nothing) -@deprecate _iterate(parser::CombinedParser, sequence, till::Int, posi::Int, ::Nothing) _iterate(parser, sequence,till,posi,posi,nothing) +@inline iterate_state(parser::CombinedParser, sequence, till::Int, posi::Int) = + iterate_state(parser, sequence,till,posi,posi,nothing) +@deprecate iterate_state(parser::CombinedParser, sequence, till::Int, posi::Int, ::Nothing) iterate_state(parser, sequence,till,posi,posi,nothing) """ @@ -173,8 +171,8 @@ Convienience function for overriding [`rightof`](@ref) that guarantees that not @inline _leftof(str,i,parser::WrappedParser,x::NCodeunitsState) = i-x.nc @inline _rightof(str,i,parser::WrappedParser,x::NCodeunitsState) = i+x.nc -@inline _iterate(parser::WrappedParser, sequence, till, posi, after, state) = - _iterate(parser.parser, sequence, till, posi, after, state) +@inline iterate_state(parser::WrappedParser, sequence, till, posi, after, state) = + iterate_state(parser.parser, sequence, till, posi, after, state) export FilterParser """ @@ -200,10 +198,10 @@ filter_result(f::Function, x) = end -@inline function _iterate(parser::FilterParser, sequence, till, posi, next_i, state) +@inline function iterate_state(parser::FilterParser, sequence, till, posi, next_i, state) r::Union{Nothing,Tuple{Int,state_type(parser.parser)}} = nothing while r === nothing - r = _iterate(parser.parser, sequence, till, posi, next_i, state) + r = iterate_state(parser.parser, sequence, till, posi, next_i, state) if r === nothing return nothing elseif !parser.state_filter(sequence, till, posi, r...) @@ -224,7 +222,7 @@ Used for dispatch in [`deepmap_parser`](@ref). abstract type LeafParser{S,T} <: CombinedParser{S,T} end # for convenience -_iterate(parser::LeafParser, sequence, till, posi, next_i, state::MatchState) = nothing +iterate_state(parser::LeafParser, sequence, till, posi, next_i, state::MatchState) = nothing """ @@ -243,7 +241,7 @@ abstract type NIndexParser{N,T} <: LeafParser{MatchState,T} end @inline _rightof(str,i,parser::NIndexParser{L},state) where L = _nextind(str,i,L) -@inline function _iterate(parser::NIndexParser, sequence, till, posi, next_i, state::Nothing) +@inline function iterate_state(parser::NIndexParser, sequence, till, posi, next_i, state::Nothing) posi > till && return nothing # prevents BoundsError ni = rightof(sequence,posi,parser,MatchState()) if ni <= till+1 @@ -345,8 +343,8 @@ Call `f(sequence,before_i,after_i,state,a...)` if `p` matches, with_effect(f::Function,p,a...) = SideeffectParser(f,p,a...) -@inline function _iterate(parser::SideeffectParser, sequence, till, posi, next_i, state) - r = _iterate(parser.parser, sequence, till, posi, next_i, state) +@inline function iterate_state(parser::SideeffectParser, sequence, till, posi, next_i, state) + r = iterate_state(parser.parser, sequence, till, posi, next_i, state) if r!==nothing parser.effect(sequence,posi,r...,parser.args...) else @@ -803,17 +801,17 @@ regex_inner(x::FlatMap) = error("regex determined at runtime!") end -function _iterate(tokf::FlatMap, str, till, posi, next_i, state::Nothing) +function iterate_state(tokf::FlatMap, str, till, posi, next_i, state::Nothing) posi = next_i - lr = _iterate(tokf.left, str, till, posi, next_i, nothing) + lr = iterate_state(tokf.left, str, till, posi, next_i, nothing) lr === nothing && return nothing next_i_ = tuple_pos(lr) rightp = parser(tokf.right(get(tokf.left, str, till, next_i_,next_i,tuple_state(lr)))) rr = nothing while rr === nothing - rr = _iterate(rightp, str, till, next_i_, next_i_, nothing) + rr = iterate_state(rightp, str, till, next_i_, next_i_, nothing) if rr === nothing - lr = _iterate(tokf.left, str, till, posi, next_i_, tuple_state(lr)) + lr = iterate_state(tokf.left, str, till, posi, next_i_, tuple_state(lr)) lr === nothing && return nothing next_i_ = tuple_pos(lr) rightp = parser(tokf.right(get(tokf.left, str, till, next_i_,posi,tuple_state(lr)))) @@ -824,16 +822,16 @@ function _iterate(tokf::FlatMap, str, till, posi, next_i, state::Nothing) nothing end -function _iterate(tokf::FlatMap, str, till, posi, next_i, state) +function iterate_state(tokf::FlatMap, str, till, posi, next_i, state) lstate,rightp,rstate = left_state(state), right_parser(state), right_state(state) next_i_=next_i posi_ = leftof(str,next_i_,rightp,rstate) rr = nothing while rr === nothing - rr = _iterate(rightp, str, till, posi_, next_i_, rstate) + rr = iterate_state(rightp, str, till, posi_, next_i_, rstate) if rr === nothing - lr = _iterate(tokf.left, str, till, posi, next_i_, lstate) + lr = iterate_state(tokf.left, str, till, posi, next_i_, lstate) lr === nothing && return nothing next_i_,lstate = lr rightp = parser(tokf.right(get(tokf.left, str, till, next_i_,posi,lstate))) @@ -1127,14 +1125,14 @@ sequence_state(statettype::Type{MatchState}) = MatchState() sequence_state(statettype::Type{<:Tuple}) = tuple( ) sequence_state(statettype::Type) = Any[] -function _iterate_(parser::Sequence, sequence, till, posi, next_i, states::Nothing) +function iterate_state_(parser::Sequence, sequence, till, posi, next_i, states::Nothing) length(parser.parts) == 0 && return next_i, sequence_state(state_type(parser)) sss = Vector{Any}(undef,length(parser.parts)) sss[1] = nothing - _iterate(parser, sequence, till, posi, next_i, sss, 1) + iterate_state(parser, sequence, till, posi, next_i, sss, 1) end -function _iterate_(parser::Sequence, sequence, till, posi, next_i, substate::Vector{Any}, p=length(substate)) +function iterate_state_(parser::Sequence, sequence, till, posi, next_i, substate::Vector{Any}, p=length(substate)) next_i_ = next_i part=parser.parts length(part) == 0 && return nothing @@ -1150,7 +1148,7 @@ function _iterate_(parser::Sequence, sequence, till, posi, next_i, substate::Vec if (@inbounds substate[p]) === nothing pposi[p+1] = pposi[p] end - r = _iterate(part[p], sequence, till, pposi[p], pposi[p+1], substate[p]) + r = iterate_state(part[p], sequence, till, pposi[p], pposi[p+1], substate[p]) if r === nothing prune_captures(sequence, pposi[p]) @@ -1171,11 +1169,11 @@ function _iterate_(parser::Sequence, sequence, till, posi, next_i, substate::Vec end # unambigously -@generated function _iterate(parser::Sequence{pts,sts}, sequence, till, posi, next_i, states::MatchState) where {pts<:Tuple,sts} +@generated function iterate_state(parser::Sequence{pts,sts}, sequence, till, posi, next_i, states::MatchState) where {pts<:Tuple,sts} nothing end -@generated function _iterate(parser::Sequence{pts,sts}, sequence, till, posi, next_i, states)::Union{Nothing,Tuple{Int,sts}} where {pts<:Tuple,sts} +@generated function iterate_state(parser::Sequence{pts,sts}, sequence, till, posi, next_i, states)::Union{Nothing,Tuple{Int,sts}} where {pts<:Tuple,sts} fpts = fieldtypes(pts) spts = Type[ Union{Nothing,state_type(t)} for t in fpts ] n = length(fpts) @@ -1243,7 +1241,7 @@ end $(pposi[p+1]) = $(pposi[p]) end ## TODO: gc happening in next line? - $(subresult[p]) = _iterate($(part[p]), sequence, till, $(pposi[p]), $(pposi[p+1]), @inbounds $(substate[p])) + $(subresult[p]) = iterate_state($(part[p]), sequence, till, $(pposi[p]), $(pposi[p+1]), @inbounds $(substate[p])) if $(subresult[p]) === nothing prune_captures(sequence,$(pposi[p])) @inbounds $(substate[p]) = nothing @@ -1542,7 +1540,7 @@ end j::Int = i state_ = state tp = t.parser - while state_length(t,state_) < t.range.stop && ( x = _iterate(t.parser,sequence, till, j, j,nothing) )!==nothing + while state_length(t,state_) < t.range.stop && ( x = iterate_state(t.parser,sequence, till, j, j,nothing) )!==nothing ## @info "rep fill..." x state_ ## e.g. match(re"(?:a|(?=b)|.)*\z","abc") j_=j @@ -1576,7 +1574,7 @@ end fill_rep(t_,sequence,till,tuple_pos(x),state_) end -function _iterate(t::Repeat, sequence, till, posi, next_i, state) +function iterate_state(t::Repeat, sequence, till, posi, next_i, state) next_i_::Int,outer_state::state_type(typeof(t)),goback::Bool = if state === nothing es = emptystate(state_type(typeof(t))) fill_rep(t,sequence,till,next_i, es) @@ -1607,7 +1605,7 @@ function _iterate(t::Repeat, sequence, till, posi, next_i, state) inner_state, outer_state=poplast!(outer_state,t.parser) posi = leftof(sequence,next_i_,t.parser,inner_state) ##state[end][1] prune_captures(sequence,posi) - x = _iterate(t.parser,sequence, till, posi, next_i_, inner_state) + x = iterate_state(t.parser,sequence, till, posi, next_i_, inner_state) x === nothing && state_length(t,outer_state) in t.range && return posi, outer_state next_i_,outer_state,goback = push_rep(t,sequence, till, posi, x, outer_state) end @@ -1621,21 +1619,21 @@ end @inline function fill_rep(t_::Lazy{<:Repeat}, sequence, till::Int, j::Int, state_) t = t_.parser tp = t.parser - while state_length(t,state_) < t.range.start && (x = _iterate(t.parser,sequence, till,j, j,nothing))!==nothing + while state_length(t,state_) < t.range.start && (x = iterate_state(t.parser,sequence, till,j, j,nothing))!==nothing j_=j j, state_ = fill_rep_j_state(x,state_,tp) j_==j && break end j,state_,false end -function _iterate(t_::Lazy{<:Repeat}, sequence, till, posi, next_i, state) +function iterate_state(t_::Lazy{<:Repeat}, sequence, till, posi, next_i, state) t = t_.parser next_i_::Int,state_::state_type(typeof(t)),goback::Bool = if state === nothing es = emptystate(state_type(typeof(t))) fill_rep(t_,sequence,till,next_i, es) else if state_length(t,state)next_i || state_length(t,state)==0) return fill_rep_j_state(x,state,t.parser) #tuple_pos(x),pushstate!(state,t.parser,tuple_state(x)) end @@ -1649,7 +1647,7 @@ function _iterate(t_::Lazy{<:Repeat}, sequence, till, posi, next_i, state) end lstate, state_=poplast!(state,t.parser) posi = leftof(sequence,next_i_,t.parser,lstate) ##state[end][1] - x = _iterate(t.parser,sequence, till, posi, next_i_, lstate) + x = iterate_state(t.parser,sequence, till, posi, next_i_, lstate) if x === nothing next_i_ = posi prune_captures(sequence,next_i_) @@ -1757,17 +1755,17 @@ end -function _iterate(t::Optional, str, till, posi, next_i, state::MatchState) +function iterate_state(t::Optional, str, till, posi, next_i, state::MatchState) prune_captures(str,posi) posi, NoMatch() end -_iterate(t::Optional, str, till, posi, next_i, state::NoMatch) = +iterate_state(t::Optional, str, till, posi, next_i, state::NoMatch) = nothing -function _iterate(t::Optional, str, till, posi, next_i, state) +function iterate_state(t::Optional, str, till, posi, next_i, state) posi = state === nothing ? next_i : leftof(str,next_i,t.parser,state) ##state[end][1] - r = _iterate(t.parser, str, till, posi, next_i, state) + r = iterate_state(t.parser, str, till, posi, next_i, state) if r === nothing prune_captures(str,posi) return tuple(posi, NoMatch()) @@ -1776,12 +1774,12 @@ function _iterate(t::Optional, str, till, posi, next_i, state) end end -_iterate(t::Lazy{<:Optional}, str, till, posi, next_i, state::Nothing) = +iterate_state(t::Lazy{<:Optional}, str, till, posi, next_i, state::Nothing) = next_i, NoMatch() -_iterate(t::Lazy{<:Optional}, str, till, posi, next_i, state::NoMatch) = - _iterate(t.parser.parser, str, till, posi, next_i, nothing) -_iterate(t::Lazy{<:Optional}, str, till, posi, next_i, state) = - _iterate(t.parser.parser, str, till, posi, next_i, state) +iterate_state(t::Lazy{<:Optional}, str, till, posi, next_i, state::NoMatch) = + iterate_state(t.parser.parser, str, till, posi, next_i, nothing) +iterate_state(t::Lazy{<:Optional}, str, till, posi, next_i, state) = + iterate_state(t.parser.parser, str, till, posi, next_i, state) @@ -2105,50 +2103,50 @@ end end -@inline function __iterate_paired(first,state,sstate::Nothing) +@inline function iterate_state_paired(first,state,sstate::Nothing) nothing end -@inline function __iterate_paired(first, state, sstate::Tuple) - __iterate_paired(first, state, sstate...) +@inline function iterate_state_paired(first, state, sstate::Tuple) + iterate_state_paired(first, state, sstate...) end -@inline function __iterate_paired(first, state, next_i_::Int, nstate_) +@inline function iterate_state_paired(first, state, next_i_::Int, nstate_) next_i_, with_state!(state,first,nstate_) end -function _iterate_paired(first, t, str, till, posi, next_i, state) - __iterate_paired(first, state, _iterate(t, str, till, posi, next_i, either_state_state(state))) +function iterate_state_paired(first, t, str, till, posi, next_i, state) + iterate_state_paired(first, state, iterate_state(t, str, till, posi, next_i, either_state_state(state))) end -function _iterate(t::Either{<:Vector}, str, till, posi, next_i, state::Nothing) +function iterate_state(t::Either{<:Vector}, str, till, posi, next_i, state::Nothing) r = nothing for (j,o) in enumerate(t.options) - r = _iterate_paired(j,o,str,till,posi, next_i,nothing) + r = iterate_state_paired(j,o,str,till,posi, next_i,nothing) r!== nothing && return r end nothing end -function _iterate(t::Either{<:Vector}, str, till, posi, next_i, state) +function iterate_state(t::Either{<:Vector}, str, till, posi, next_i, state) @inbounds opt = t.options[either_state_option(state)] fromindex = either_state_option(state)+1 posi = leftof(str,next_i,opt,either_state_state(state)) ##state[end][1] - r = _iterate_paired(either_state_option(state),opt,str,till,posi, next_i,state) + r = iterate_state_paired(either_state_option(state),opt,str,till,posi, next_i,state) r !== nothing && return r prune_captures(str,posi) ##sstate = nothing for j in fromindex:length(t.options) - @inbounds r2 = _iterate_paired(j,t.options[j],str,till,posi,posi,nothing) + @inbounds r2 = iterate_state_paired(j,t.options[j],str,till,posi,posi,nothing) r2 !== nothing && return r2 end nothing end -function _iterate(parser::Either{<:Tuple}, sequence, till, posi, next_i, state) +function iterate_state(parser::Either{<:Tuple}, sequence, till, posi, next_i, state) either_first(parser,posi,next_i,state) do index, option, ni, sstate - _iterate_paired(index, option, sequence, till, posi, ni, sstate) + iterate_state_paired(index, option, sequence, till, posi, ni, sstate) end end @@ -2166,7 +2164,7 @@ end @label $(subsearch[p]) j > $p && @goto $(subsearch[p+1]) $(subresult[p]) = f($p,$(part[p]), next_i_, sstate) - $(subresult[p]) !== nothing && return $(subresult[p])# __iterate_paired($p,state, $(subresult[p])) + $(subresult[p]) !== nothing && return $(subresult[p])# iterate_state_paired($p,state, $(subresult[p])) next_i_ = posi sstate = nothing end @@ -2212,15 +2210,15 @@ Atomic(p) = Atomic(parser(x)) regex_prefix(x::Atomic) = "(?>"*regex_prefix(x.parser) regex_suffix(x::Atomic) = regex_suffix(x.parser)*")" function Base.get(parser::Atomic, sequence, till, after, i, state::AtomicState) - a, s = _iterate(parser.parser, sequence, till, i, i, nothing) + a, s = iterate_state(parser.parser, sequence, till, i, i, nothing) get(parser.parser, sequence, till, after, i, s) end -@inline _iterate(parser::Atomic, sequence, till, posi, next_i, state::Nothing) = - _iterate(parser.parser, sequence, till, posi, next_i, state) -@inline _iterate(parser::Atomic{<:Any,AtomicState}, sequence, till, posi, next_i, state::Nothing) = - AtomicState(_iterate(parser.parser, sequence, till, posi, next_i, state)) -@inline _iterate(parser::Atomic, sequence, till, posi, next_i, state) = +@inline iterate_state(parser::Atomic, sequence, till, posi, next_i, state::Nothing) = + iterate_state(parser.parser, sequence, till, posi, next_i, state) +@inline iterate_state(parser::Atomic{<:Any,AtomicState}, sequence, till, posi, next_i, state::Nothing) = + AtomicState(iterate_state(parser.parser, sequence, till, posi, next_i, state)) +@inline iterate_state(parser::Atomic, sequence, till, posi, next_i, state) = nothing diff --git a/src/assertions.jl b/src/assertions.jl index 60f60e6..1480d87 100644 --- a/src/assertions.jl +++ b/src/assertions.jl @@ -6,7 +6,7 @@ Parsers that do not consume any input can inherit `Assertion{S,T}`. abstract type Assertion{S,T} <: CombinedParser{S,T} end @inline _leftof(str,i,parser::Assertion,x...) = i @inline _rightof(str,i,parser::Assertion,x...) = i -@inline _iterate(t::Assertion{MatchState}, str, till, posi, next_i, state::MatchState) = nothing +@inline iterate_state(t::Assertion{MatchState}, str, till, posi, next_i, state::MatchState) = nothing """ Base.get(parser::Assertion{MatchState, <:Assertion}, sequence, till, after, i, state) @@ -33,7 +33,7 @@ re"^" """ struct AtStart <: Assertion{MatchState,AtStart} end regex_inner(x::AtStart) = "^" -_iterate(parser::AtStart, sequence, till, posi, next_i, state::Nothing) = +iterate_state(parser::AtStart, sequence, till, posi, next_i, state::Nothing) = next_i == 1 ? (next_i, MatchState()) : nothing print_constructor(io::IO, x::AtStart) = print(io,"AtStart") @@ -51,7 +51,7 @@ re"\$" """ struct AtEnd <: Assertion{MatchState,AtEnd} end regex_inner(x::AtEnd) = "\$" -_iterate(parser::AtEnd, sequence, till, posi, next_i, state::Nothing) = +iterate_state(parser::AtEnd, sequence, till, posi, next_i, state::Nothing) = next_i > till ? (next_i, MatchState()) : nothing print_constructor(io::IO, x::AtEnd) = print(io,"AtEnd") @@ -73,7 +73,7 @@ struct Never <: Assertion{MatchState,Never} end regex_prefix(x::Never) = "(*" regex_inner(x::Never) = "FAIL" regex_suffix(x::Never) = ")" -_iterate(x::Never,str,posi, next_i,till,state::Nothing) = +iterate_state(x::Never,str,posi, next_i,till,state::Nothing) = nothing @@ -97,9 +97,8 @@ children(x::Union{Never,Always}) = tuple() regex_prefix(x::Always) = "" regex_inner(x::Always) = "" regex_suffix(x::Always) = "" -_iterate(parser::Always, str, till, posi, next_i, s::Nothing) = +iterate_state(parser::Always, str, till, posi, next_i, s::Nothing) = next_i, MatchState() -##_iterate(parser::Never, str, till, posi, next_i, s) = nothing Base.show(io::IO, x::Union{AtStart,AtEnd,Never,Always}) = @@ -147,8 +146,8 @@ julia> parse(la*AnyChar(),"peek") end end regex_prefix(x::PositiveLookahead) = "(?="*regex_prefix(x.parser) -function _iterate(t::PositiveLookahead, str, till, posi, next_i, state) - r = _iterate(t.parser, str, till, posi, tuple_pos(state,posi), tuple_state(state)) +function iterate_state(t::PositiveLookahead, str, till, posi, next_i, state) + r = iterate_state(t.parser, str, till, posi, tuple_pos(state,posi), tuple_state(state)) if r === nothing nothing else @@ -186,8 +185,8 @@ julia> parse(la*AnyChar(),"seek") end end regex_prefix(x::NegativeLookahead) = "(?!"*regex_prefix(x.parser) -function _iterate(t::NegativeLookahead, str, till, posi, next_i, state::Nothing) - r = _iterate(t.parser, str, till, posi, next_i, nothing) +function iterate_state(t::NegativeLookahead, str, till, posi, next_i, state::Nothing) + r = iterate_state(t.parser, str, till, posi, next_i, nothing) if r === nothing next_i,MatchState() else diff --git a/src/caseless.jl b/src/caseless.jl index 7da3722..20d0560 100644 --- a/src/caseless.jl +++ b/src/caseless.jl @@ -50,5 +50,5 @@ Match parser on [`CharMappedString`](https://github.com/gkappler/LazyStrings.jl) end children(x::MappedSequenceParser) = tuple(x.parser, x.f) -@inline _iterate(parser::MappedSequenceParser, sequence, till, posi,after,state) = - _iterate(parser.parser, lmap(parser.f,sequence), till,posi,after,state) +@inline iterate_state(parser::MappedSequenceParser, sequence, till, posi,after,state) = + iterate_state(parser.parser, lmap(parser.f,sequence), till,posi,after,state) diff --git a/src/constant.jl b/src/constant.jl index 0c28edb..d6a8c69 100644 --- a/src/constant.jl +++ b/src/constant.jl @@ -41,13 +41,13 @@ _lowercase(x::CombinedParser) = x _lowercase(x::ConstantParser) = ConstantParser(lowercase(x.parser)) -@inline _iterate(parser::ConstantParser, sequence, till, posi, next_i, state::Nothing) = - _iterate_constant(parser,sequence,till,posi, next_i, state) +@inline iterate_state(parser::ConstantParser, sequence, till, posi, next_i, state::Nothing) = + iterate_state_constant(parser,sequence,till,posi, next_i, state) -@inline _iterate_constant(parser::ConstantParser, sequence, till, posi, next_i, state) = - _iterate_constant(parser.parser,sequence,till,posi, next_i, state, _ncodeunits(parser)) +@inline iterate_state_constant(parser::ConstantParser, sequence, till, posi, next_i, state) = + iterate_state_constant(parser.parser,sequence,till,posi, next_i, state, _ncodeunits(parser)) -@inline function _iterate_constant(p::AbstractString, sequence, till, posi, next_i, state::Nothing,L) +@inline function iterate_state_constant(p::AbstractString, sequence, till, posi, next_i, state::Nothing,L) till, posi, next_i j::Int = next_i k::Int = 1 @@ -62,7 +62,7 @@ _lowercase(x::ConstantParser) = ConstantParser(lowercase(x.parser)) return j, MatchState() end -@inline function _iterate_constant(parser, sequence, till, posi, next_i, state::Nothing, L) +@inline function iterate_state_constant(parser, sequence, till, posi, next_i, state::Nothing, L) state !== nothing || next_i>till || next_i < 1 && return nothing if next_i<=till && ismatch(sequence[next_i],parser) next_i+L, MatchState() diff --git a/src/deepmap.jl b/src/deepmap.jl index a403bba..d6d579a 100644 --- a/src/deepmap.jl +++ b/src/deepmap.jl @@ -215,7 +215,7 @@ substitute(name::Symbol) = Substitution(name) substitute(name::AbstractString) = Substitution(Symbol(name)) -CombinedParsers._iterate(parser::Substitution, a...) = error(" call substitute") +CombinedParsers.iterate_state(parser::Substitution, a...) = error(" call substitute") function CombinedParsers.print_constructor(io::IO, x::Substitution) printstyled(io, x.name, color=:red) print(io, " call substitute!") diff --git a/src/indexed_captures.jl b/src/indexed_captures.jl index a3f1ebf..da43565 100644 --- a/src/indexed_captures.jl +++ b/src/indexed_captures.jl @@ -22,14 +22,14 @@ function print_constructor(io::IO, x::ParserWithCaptures) ( length(x.subroutines)>0 ? " with $(length(x.subroutines)) capturing groups" : "" ) ) end """ - _iterate(p::ParserWithCaptures, sequence::SequenceWithCaptures,a...) + iterate_state(p::ParserWithCaptures, sequence::SequenceWithCaptures,a...) `Base.empty!(sequence)` before iteration. (Why?) """ -function _iterate(p::ParserWithCaptures, sequence::SequenceWithCaptures,a...) +function iterate_state(p::ParserWithCaptures, sequence::SequenceWithCaptures,a...) Base.empty!(sequence) - _iterate(p.parser, sequence, a...) + iterate_state(p.parser, sequence, a...) end """ @@ -49,8 +49,6 @@ function ParserWithCaptures(x) isempty(r.subroutines) ? r.parser : r end -# _iterate(parser::ParserWithCaptures, sequence::AbstractString, till, next_i, after, state::Nothing) = -# _iterate(parser, sequence, till, next_i, next_i, state) SequenceWithCaptures(x,cs::CombinedParser) = x function SequenceWithCaptures(x,cs::ParserWithCaptures) diff --git a/src/match.jl b/src/match.jl index 9c9dcb7..8f96fdc 100644 --- a/src/match.jl +++ b/src/match.jl @@ -45,8 +45,8 @@ Base.eltype(T::Type{<:MatchesIterator{P,S}}) where {P,S} = Base.IteratorSize(::Type{<:MatchesIterator}) = Base.SizeUnknown() -@inline _iterate(mi::MatchesIterator,a...) = - _iterate(mi.parser, mi.sequence, mi.till, a...) +@inline iterate_state(mi::MatchesIterator, posi, a...) = + iterate_state(mi.parser, mi.sequence, mi.till, posi, a...) Base.get(x::MatchesIterator, a...)= get(x.parser,x.sequence,x.till, a...) @@ -152,8 +152,8 @@ end result_type(::Type{<:ParseMatch{P}}) where P = result_type(P) -@inline _iterate(m::ParseMatch) = - _iterate(m.parsings,m.offset,m.after,m.state) +@inline iterate_state(m::ParseMatch) = + iterate_state(m.parsings,m.offset,m.after,m.state) """ Base.get(x::ParseMatch{<:MatchTuple}) @@ -183,10 +183,10 @@ Base.IteratorSize(::Type{<:ParseMatch}) = Base.SizeUnknown() """ Base.iterate(x::ParseMatch[, m::ParseMatch=x]) -Returns next [`ParseMatch`](@ref) at `m.offset` after `m.state`, see [`_iterate`](@ref)(m). +Returns next [`ParseMatch`](@ref) at `m.offset` after `m.state`, see [`iterate_state`](@ref)(m). """ function Base.iterate(x::ParseMatch, m=x) - i = _iterate(m) + i = iterate_state(m) parsematch_tuple(m.parsings,m.offset,i) end @@ -219,12 +219,12 @@ Return first next [`ParseMatch`](@ref) (as return value and state) or `nothing` @inline function Base.iterate(m::MatchesIterator, s::ParseMatch) offset,after = s.offset, s.after stop = m.stop - state = _iterate(m,offset,after,s.state) + state = iterate_state(m,offset,after,s.state) while offset <= stop+1 && state===nothing # state = iterate(m.parsings,(offset,nothing)) offset > stop && break offset = _nextind(m.sequence,offset) - state = _iterate(m,offset,offset,nothing) + state = iterate_state(m,offset,offset,nothing) end parsematch_tuple(m,offset,state) end @@ -290,7 +290,7 @@ function Base.tryparse(p::AbstractToken, s, pos...; kw...) end function tryparse_pos(p,s, idx=firstindex(s), till=lastindex(s); kw...) - i = _iterate(wrap(p; kw...),s,till,idx,idx,nothing) + i = iterate_state(wrap(p; kw...),s,till,idx,idx,nothing) i === nothing && return nothing get(p,s,till,tuple_pos(i),1,tuple_state(i)), tuple_pos(i) end diff --git a/src/memoize.jl b/src/memoize.jl index f0bb314..e0af986 100644 --- a/src/memoize.jl +++ b/src/memoize.jl @@ -44,20 +44,14 @@ end _deepmap_parser(f::Function,mem::AbstractDict,x::MemoizingParser,a...;kw...) = MemoizingParser(deepmap_parser(f,mem,x.parser,a...;kw...)) -@inline function _iterate(parser::MemoizingParser, sequence::String, till, posi,after,state) +@inline function iterate_state(parser::MemoizingParser, sequence, till, posi,after,state) error("for memoizing, wrap sequence in WithMemory. Todo: automize wrapping in root parser with optimize") - _iterate(parser.parser, sequence, till,posi,after,state) + iterate_state(parser.parser, sequence, till,posi,after,state) end -@inline Base.@propagate_inbounds function _iterate(parser::MemoizingParser, sequence::WithMemory, till, posi,after,state) +@inline Base.@propagate_inbounds function iterate_state(parser::MemoizingParser, sequence::WithMemory, till, posi,after,state) get!(sequence.mem,(parser.parser,posi,state)) do - copy(_iterate(parser.parser, sequence,till,posi,after,state)) - end -end - -@inline Base.@propagate_inbounds function _iterate(parser, sequence::WithMemory, till, posi,after,state) - get!(sequence.mem,(parser,posi,state)) do - copy(_iterate(parser, sequence,till,posi,after,state)) + copy(iterate_state(parser.parser, sequence,till,posi,after,state)) end end diff --git a/src/pcre.jl b/src/pcre.jl index e7e4ff0..fb89f35 100644 --- a/src/pcre.jl +++ b/src/pcre.jl @@ -96,7 +96,7 @@ end Base.isascii(x::CharWithOptions) = isascii(x.x) Base.isprint(x::CharWithOptions) = isprint(x.x) -@inline function _iterate(p::CharWithOptions, sequence, till, posi, next_i, state::Nothing, nc=0) +@inline function iterate_state(p::CharWithOptions, sequence, till, posi, next_i, state::Nothing, nc=0) @inbounds sc,j=iterate(sequence,posi) if ismatch(p,sc) j, MatchState() @@ -292,8 +292,8 @@ end till, after, i, state) -@inline function _iterate(parser::ParserOptions, sequence, till, posi, next_i, state) - _iterate(parser.parser, +@inline function iterate_state(parser::ParserOptions, sequence, till, posi, next_i, state) + iterate_state(parser.parser, with_options(parser.set_flags,parser.unset_flags,sequence), till, posi, next_i, state) end @@ -425,8 +425,8 @@ Either( on_options(flags::Integer,p) = OnOptionsParser(parser(p),UInt32(flags)) -@inline function _iterate(parser::OnOptionsParser, sequence, till, posi, next_i, state) - _iterate(parser.parser, +@inline function iterate_state(parser::OnOptionsParser, sequence, till, posi, next_i, state) + iterate_state(parser.parser, (if_options(parser.flags,sequence)), till, posi, next_i, state) end diff --git a/src/re.jl b/src/re.jl index 022fee7..cec7c30 100644 --- a/src/re.jl +++ b/src/re.jl @@ -12,7 +12,7 @@ import LazyStrings: reversed, reverse_index import ..CombinedParsers: LeafParser, WrappedParser, CombinedParser, ConstantParser, Either, SideeffectParser import ..CombinedParsers: parser, prune_captures, deepmap_parser, _deepmap_parser, print_constructor -import ..CombinedParsers: _iterate, _iterate_constant +import ..CombinedParsers: iterate_state, iterate_state_constant import ..CombinedParsers: regex_prefix, regex_suffix, regex_inner, _regex_string, regex_string, _log_names import ..CombinedParsers: state_type, leftof, tuple_pos, tuple_state import ..CombinedParsers: _prevind, _nextind, _leftof, _rightof @@ -26,7 +26,7 @@ include("pcre.jl") SequenceWithCaptures ensapsulates a sequence to be parsed, and parsed captures. This struct will allow for captures a sequence-level state. -For next version, a match-level state passed as `iterate_state` argument is considered. +For next version, a match-level state passed as iterate_state argument is considered. See also [`ParserWithCaptures`](@ref) """ @@ -138,8 +138,8 @@ end Base.get(x::Capture, sequence, till, after, i, state) = get(x.parser, sequence, till, after, i, state) -@inline function _iterate(parser::Capture, sequence, till, posi, next_i, state) - r = _iterate(parser.parser, sequence, till, posi, next_i, state) +@inline function iterate_state(parser::Capture, sequence, till, posi, next_i, state) + r = iterate_state(parser.parser, sequence, till, posi, next_i, state) if r !== nothing ## set only if found (e.g. if repeated capture capture last) set_capture(sequence,parser.index,posi,_prevind(sequence,tuple_pos(r))) elseif state !== nothing @@ -248,17 +248,17 @@ function capture_substring(p::Backreference, sequence::SequenceWithCaptures) SubString(sequence.x, sequence.captures[index][end]) end -@inline function _iterate(p::Union{Backreference,ParserOptions{<:Backreference}}, +@inline function iterate_state(p::Union{Backreference,ParserOptions{<:Backreference}}, sequence::SequenceWithCaptures, till, posi, next_i, state::Nothing) - r = _iterate_constant( + r = iterate_state_constant( ConstantParser(capture_substring(p, sequence)), sequence, till, posi, next_i, state) r === nothing && return nothing tuple_pos(r), tuple_pos(r)-next_i end -@inline function _iterate(p::Union{Backreference,ParserOptions{<:Backreference}}, +@inline function iterate_state(p::Union{Backreference,ParserOptions{<:Backreference}}, sequence::SequenceWithCaptures, till, posi, next_i, state) return nothing @@ -267,7 +267,7 @@ end -_iterate_condition(p::Backreference, sequence, till, posi, next_i, state) = +iterate_state_condition(p::Backreference, sequence, till, posi, next_i, state) = resolve_index(p, sequence)>0 @@ -310,7 +310,7 @@ regex_inner(x::Subroutine) = "" _deepmap_parser(::Function,mem::AbstractDict,x::Subroutine) = x -function _iterate_condition(cond::Subroutine, sequence, till, posi, next_i, state) +function iterate_state_condition(cond::Subroutine, sequence, till, posi, next_i, state) sequence.state === nothing && return false if cond.name === nothing && cond.index < 0 true @@ -342,8 +342,8 @@ Index of a subroutine. index(parser::Subroutine,sequence) = parser.index <= 0 ? first(sequence.names[parser.name]) : parser.index -@inline function _iterate(parser::Subroutine, sequence::SequenceWithCaptures, till, posi, next_i, state) - _iterate( +@inline function iterate_state(parser::Subroutine, sequence::SequenceWithCaptures, till, posi, next_i, state) + iterate_state( sequence.subroutines[index(parser,sequence)].parser, copy_captures(sequence,parser), till, posi, next_i, state) end @@ -386,7 +386,7 @@ _deepmap_parser(f::Function,mem::AbstractDict,x::DupSubpatternNumbers, a...;kw.. export Conditional """ -Conditional parser, `_iterate` cycles conditionally on `_iterate_condition` through matches in field `yes` and `no` respectively. +Conditional parser, `iterate_state` cycles conditionally on `iterate_state_condition` through matches in field `yes` and `no` respectively. """ @auto_hash_equals struct Conditional{C,Y,N,S,T} <: CombinedParser{S,T} condition::C @@ -419,10 +419,10 @@ end @inline Base.get(parser::Conditional, sequence, till, after, i, state) = get(state.first == :yes ? parser.yes : parser.no, sequence, till, after, i, state.second) -_iterate_condition(cond::WrappedParser, sequence, till, posi, next_i, state) = - _iterate_condition(cond.parser, sequence, till, posi, next_i, state) -_iterate_condition(cond, sequence, till, posi, next_i, state) = - _iterate(cond, sequence, till, posi, next_i, state) !== nothing +iterate_state_condition(cond::WrappedParser, sequence, till, posi, next_i, state) = + iterate_state_condition(cond.parser, sequence, till, posi, next_i, state) +iterate_state_condition(cond, sequence, till, posi, next_i, state) = + iterate_state(cond, sequence, till, posi, next_i, state) !== nothing @@ -434,17 +434,17 @@ end rightof(str,i,state.first == :yes ? parser.yes : parser.no, state.second) end -@inline function _iterate(parser::Conditional, sequence, till, posi, next_i, state::Nothing) - c = _iterate_condition(parser.condition, sequence, till, posi, next_i, state) +@inline function iterate_state(parser::Conditional, sequence, till, posi, next_i, state::Nothing) + c = iterate_state_condition(parser.condition, sequence, till, posi, next_i, state) cparse = c ? parser.yes : parser.no - s = _iterate(cparse, + s = iterate_state(cparse, sequence, till, posi, next_i, state) s === nothing && return nothing tuple_pos(s), (c ? :yes : :no) => tuple_state(s) end -@inline function _iterate(parser::Conditional, sequence, till, posi, next_i, state::Pair) - _iterate(state.first == :yes ? parser.yes : parser.no, sequence, till, posi, next_i, state.second) +@inline function iterate_state(parser::Conditional, sequence, till, posi, next_i, state::Pair) + iterate_state(state.first == :yes ? parser.yes : parser.no, sequence, till, posi, next_i, state.second) end include("indexed_captures.jl") diff --git a/src/reverse.jl b/src/reverse.jl index 3f5d015..8fef071 100644 --- a/src/reverse.jl +++ b/src/reverse.jl @@ -70,11 +70,11 @@ end children(x::PositiveLookbehind) = children(x.parser) -function _iterate(t::NegativeLookbehind, str, till, posi, next_i, state::Nothing) +function iterate_state(t::NegativeLookbehind, str, till, posi, next_i, state::Nothing) rseq=reversed(str) next_i < 1 && return next_i, MatchState() p = reverse_index(rseq,_prevind(str,next_i)) - r = _iterate(t.parser, rseq, till, p, p, nothing) + r = iterate_state(t.parser, rseq, till, p, p, nothing) if r === nothing next_i,MatchState() else @@ -83,14 +83,14 @@ function _iterate(t::NegativeLookbehind, str, till, posi, next_i, state::Nothing end -_iterate(t::PositiveLookbehind, str, till, posi, next_i, state::MatchState) = +iterate_state(t::PositiveLookbehind, str, till, posi, next_i, state::MatchState) = nothing -function _iterate(t::PositiveLookbehind, str, till, posi, next_i, state) +function iterate_state(t::PositiveLookbehind, str, till, posi, next_i, state) rseq=reversed(str) ri = reverse_index(rseq,_prevind(str,next_i)) next_i < 1 && return nothing - r = _iterate(t.parser, rseq, till, ri, tuple_pos(state,ri), tuple_state(state)) + r = iterate_state(t.parser, rseq, till, ri, tuple_pos(state,ri), tuple_state(state)) if r === nothing nothing else diff --git a/src/state.jl b/src/state.jl index 64667d2..55c93e2 100644 --- a/src/state.jl +++ b/src/state.jl @@ -45,7 +45,7 @@ Base.convert(::Type{NCodeunitsState{S}}, x::NCodeunitsState) where S = """ tuple_pos(pos_state::Tuple) -[`_iterate`](@ref) returns a tuple `pos_state` or nothing, and +[`iterate_state`](@ref) returns a tuple `pos_state` or nothing, and `pos_state[1]` is position after match. """ @inline tuple_pos(pos_state::Tuple, default...) = pos_state[1] @@ -54,7 +54,7 @@ Base.convert(::Type{NCodeunitsState{S}}, x::NCodeunitsState) where S = """ tuple_state(pos_state::Tuple) -[`_iterate`](@ref) returns a tuple `pos_state` or nothing, and +[`iterate_state`](@ref) returns a tuple `pos_state` or nothing, and `pos_state[2]` is the state of match. """ @inline tuple_state(pos_state::Tuple) = pos_state[2] diff --git a/src/textparse.jl b/src/textparse.jl index d237e16..c68c767 100644 --- a/src/textparse.jl +++ b/src/textparse.jl @@ -59,13 +59,13 @@ print_constructor(io::IO, x::AbstractTokenParser) = print_constructor(io::IO, x::AbstractTokenParser{<:TextParse.DateTimeToken}) = print(io, x.parser.format) -_iterate(parser::AbstractTokenParser, sequence, till, before_i, next_i, state) = - _iterate_token(parser.parser, sequence, till, before_i, next_i, state) +iterate_state(parser::AbstractTokenParser, sequence, till, before_i, next_i, state) = + iterate_state_token(parser.parser, sequence, till, before_i, next_i, state) -_iterate_token(parser::AbstractToken, sequence, till, before_i, next_i, state) = +iterate_state_token(parser::AbstractToken, sequence, till, before_i, next_i, state) = nothing -function _iterate_token(parser::AbstractToken, sequence, till, before_i, next_i, state::Nothing, opts=TextParse.default_opts) +function iterate_state_token(parser::AbstractToken, sequence, till, before_i, next_i, state::Nothing, opts=TextParse.default_opts) r,next_i_ = tryparsenext(parser, sequence, next_i, till,opts) if isnull(r) nothing @@ -101,7 +101,7 @@ julia> TextParse.tryparsenext(p, "Number: 42") """ function TextParse.tryparsenext(x::CombinedParser,str,i,till,opts=TextParse.default_opts) - s = _iterate(x,str,till,i,nothing) + s = iterate_state(x,str,till,i,nothing) if s === nothing Nullable{result_type(x)}(),i else diff --git a/src/trie.jl b/src/trie.jl index 5e88714..43d2121 100644 --- a/src/trie.jl +++ b/src/trie.jl @@ -27,15 +27,15 @@ function Either(x::Dict) end either_state_type(T::Type{<:Trie}) = NCodeunitsState{T} -@inline _iterate(p::Either{<:AbstractTrie}, str, till, posi, next_i, state) = - _iterate(p.options, str, till, posi, next_i, state) +@inline iterate_state(p::Either{<:AbstractTrie}, str, till, posi, next_i, state) = + iterate_state(p.options, str, till, posi, next_i, state) """ - _iterate(p::AbstractTrie{Char}, str, till, posi, next_i, ::Nothing) + iterate_state(p::AbstractTrie{Char}, str, till, posi, next_i, ::Nothing) Match char path in `p` greedily, recording `SubTrie` in a [`NCodeunitsState`](@ref). """ -@inline function _iterate(p::AbstractTrie{Char}, str, till, posi, next_i, state::Nothing) +@inline function iterate_state(p::AbstractTrie{Char}, str, till, posi, next_i, state::Nothing) ni = ni_ = posi st = st_ = p while st !== nothing && ni <= till @@ -54,8 +54,8 @@ Match char path in `p` greedily, recording `SubTrie` in a [`NCodeunitsState`](@r end end -@inline _iterate(p::AbstractTrie{Char}, str, till, posi, next_i, state) = - _iterate(p, str, _prevind(str,next_i,2), posi, posi, nothing) +@inline iterate_state(p::AbstractTrie{Char}, str, till, posi, next_i, state) = + iterate_state(p, str, _prevind(str,next_i,2), posi, posi, nothing) # disambiguation @inline _rightof(str,i,parser::Either{<:AbstractTrie},x::NCodeunitsState) = diff --git a/src/valuematcher.jl b/src/valuematcher.jl index 549a0ea..f6d2d9c 100644 --- a/src/valuematcher.jl +++ b/src/valuematcher.jl @@ -69,11 +69,11 @@ regex_prefix(x::AnyValue) = "" regex_suffix(x::AnyValue) = "" """ - _iterate(parser::ValueMatcher, sequence, till, posi, next_i, state::Nothing) + iterate_state(parser::ValueMatcher, sequence, till, posi, next_i, state::Nothing) When implementing a `Custom<:ValueMatcher` it suffices to provide a method [`CombinedParsers._ismatch`](@ref)`(c, parser::Custom)`. """ -@inline function _iterate(parser::ValueMatcher, sequence, till, posi, next_i, state::Nothing) +@inline function iterate_state(parser::ValueMatcher, sequence, till, posi, next_i, state::Nothing) next_i>till && return nothing @inbounds c,ni = sequence[next_i], _nextind(sequence, next_i) !ismatch(c,parser) && return nothing diff --git a/test/test-parser.jl b/test/test-parser.jl index e7ef1bc..ae3cc13 100644 --- a/test/test-parser.jl +++ b/test/test-parser.jl @@ -47,7 +47,7 @@ end @testset "Bytes" begin # simple test for binary parsing - _iterate(Bytes(1,UInt8),[0x33],1,1,1,nothing) + iterate_state(Bytes(1,UInt8),[0x33],1,1,1,nothing) @test parse(Bytes(1,UInt8),[0x33]) == 0x33 @test parse(Bytes(2,UInt16),[0x33,0x66]) == 0x6633 @test parse(Bytes(4,Float32),[0x55,0x77,0x33,0x66]) == reinterpret(Float32,0x66337755)